From b8be80cb8b275ce440cfc960dfa0fb13e3192f90 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sat, 22 Mar 2025 00:48:41 +0100 Subject: [PATCH 01/24] transformer overhaul --- hcl2/api.py | 2 +- hcl2/{transformer.py => dict_transformer.py} | 4 + hcl2/rule_transformer.py | 101 ++++ hcl2/serialization.py | 496 +++++++++++++++++++ test/helpers/hcl2_helper.py | 2 +- test/unit/test_dict_transformer.py | 2 +- 6 files changed, 604 insertions(+), 3 deletions(-) rename hcl2/{transformer.py => dict_transformer.py} (99%) create mode 100644 hcl2/rule_transformer.py create mode 100644 hcl2/serialization.py diff --git a/hcl2/api.py b/hcl2/api.py index 399ba929..1cec02a2 100644 --- a/hcl2/api.py +++ b/hcl2/api.py @@ -3,7 +3,7 @@ from lark.tree import Tree from hcl2.parser import parser, reconstruction_parser -from hcl2.transformer import DictTransformer +from hcl2.dict_transformer import DictTransformer from hcl2.reconstructor import HCLReconstructor, HCLReverseTransformer diff --git a/hcl2/transformer.py b/hcl2/dict_transformer.py similarity index 99% rename from hcl2/transformer.py rename to hcl2/dict_transformer.py index 382092d6..64c58bcb 100644 --- a/hcl2/transformer.py +++ b/hcl2/dict_transformer.py @@ -277,6 +277,10 @@ def heredoc_template_trim(self, args: List) -> str: def new_line_or_comment(self, args: List) -> _DiscardType: return Discard + # def EQ(self, args: List): + # print("EQ", args) + # return args + def for_tuple_expr(self, args: List) -> str: args = self.strip_new_line_tokens(args) for_expr = " ".join([self.to_tf_inline(arg) for arg in args[1:-1]]) diff --git a/hcl2/rule_transformer.py b/hcl2/rule_transformer.py new file mode 100644 index 00000000..8f0b922a --- /dev/null +++ b/hcl2/rule_transformer.py @@ -0,0 +1,101 @@ +# pylint: disable=missing-function-docstring,unused-argument +from typing import List, Union + +from lark import Transformer, Tree, Token +from lark.visitors import _Leaf_T, _Return_T, Discard + +from hcl2.serialization import ( + LarkRule, + LarkToken, + StartRule, + BodyRule, + BlockRule, + IdentifierRule, + IntLitRule, + FloatLitRule, + StringLitRule, + ExprTermRule, + ConditionalRule, + BinaryOpRule, + BinaryOperatorRule, + BinaryTermRule, + UnaryOpRule, + AttributeRule, + NewLineOrCommentRule, +) + +ArgsType = List[Union[Token, Tree]] + + +class RuleTransformer(Transformer): + """Takes a syntax tree generated by the parser and + transforms it to a tree of LarkRule instances + """ + + with_meta: bool + + @staticmethod + def is_type_keyword(value: str) -> bool: + return value in {"bool", "number", "string"} + + def __init__(self, with_meta: bool = False, with_comments: bool = True): + """ + :param with_meta: If set to true then adds `__start_line__` and `__end_line__` + parameters to the output dict. Default to false. + """ + self._with_meta = with_meta + self._with_comments = with_comments + super().__init__() + + def start(self, args: ArgsType) -> StartRule: + return StartRule(args) + + def body(self, args: ArgsType) -> BodyRule: + return BodyRule(args) + + def block(self, args: ArgsType) -> BlockRule: + return BlockRule(args) + + def identifier(self, args: ArgsType) -> IdentifierRule: + return IdentifierRule(args) + + def int_lit(self, args: ArgsType) -> IntLitRule: + return IntLitRule(args) + + def float_lit(self, args: ArgsType) -> FloatLitRule: + return FloatLitRule(args) + + def string_lit(self, args: ArgsType) -> StringLitRule: + return StringLitRule(args) + + def expr_term(self, args: ArgsType) -> ExprTermRule: + return ExprTermRule(args) + + def conditional(self, args: ArgsType) -> ConditionalRule: + return ConditionalRule(args) + + def binary_operator(self, args: ArgsType) -> BinaryOperatorRule: + return BinaryOperatorRule(args) + + def binary_term(self, args: ArgsType) -> BinaryTermRule: + return BinaryTermRule(args) + + def unary_op(self, args: ArgsType) -> UnaryOpRule: + return UnaryOpRule(args) + + def binary_op(self, args: ArgsType) -> BinaryOpRule: + return BinaryOpRule(args) + + def attribute(self, args: ArgsType) -> AttributeRule: + return AttributeRule(args) + + def new_line_or_comment(self, args: ArgsType) -> NewLineOrCommentRule: + if self._with_comments: + return NewLineOrCommentRule(args) + return Discard + + def transform(self, tree: Tree[_Leaf_T]) -> LarkRule: + return super().transform(tree) + + def __default_token__(self, token: Token) -> LarkToken: + return LarkToken(token.type, token.value) diff --git a/hcl2/serialization.py b/hcl2/serialization.py new file mode 100644 index 00000000..15d75caa --- /dev/null +++ b/hcl2/serialization.py @@ -0,0 +1,496 @@ +from abc import ABC, abstractmethod +from json import JSONEncoder +from typing import List, Any, Union, Tuple, Optional + +from lark import Tree, Token + +ArgsType = List["LarkElement"] + + +def is_dollar_string(value: str) -> bool: + return value.startswith("${") and value.endswith("}") + + +def to_dollar_string(value: str) -> str: + if not is_dollar_string(value): + return f"${{{value}}}" + return value + + +def unwrap_dollar_string(value: str) -> str: + if is_dollar_string(value): + return value[2:-1] + return value + + +def wrap_into_parentheses(value: str) -> str: + if is_dollar_string(value): + value = unwrap_dollar_string(value) + return to_dollar_string(f"({value})") + + return f"({value})" + + +class LarkEncoder(JSONEncoder): + def default(self, obj: Any): + if isinstance(obj, LarkRule): + return obj.serialize() + else: + return super().default(obj) + + +class LarkElement(ABC): + @abstractmethod + def tree(self) -> Token: + raise NotImplementedError() + + @abstractmethod + def serialize(self) -> Any: + raise NotImplementedError() + + +class LarkToken(LarkElement): + def __init__(self, name: str, value: Union[str, int]): + self._name = name + self._value = value + + @property + def name(self) -> str: + return self._name + + @property + def value(self): + return self._value + + def serialize(self) -> Any: + return self._value + + def tree(self) -> Token: + return Token(self.name, self.value) + + def __str__(self) -> str: + return str(self._value) + + def __repr__(self) -> str: + return f"" + + +EQ_Token = LarkToken + + +class TokenSequence: + def __init__(self, tokens: List[LarkToken]): + self.tokens = tokens + + def tree(self) -> List[Token]: + return [token.tree() for token in self.tokens] + + def joined(self): + return "".join(str(token) for token in self.tokens) + + +class LarkRule(ABC): + _classes = [] + + @staticmethod + @abstractmethod + def rule_name() -> str: + raise NotImplementedError() + + @abstractmethod + def serialize(self) -> Any: + raise NotImplementedError() + + def tree(self) -> Tree: + result_children = [] + for child in self._children: + if child is None: + continue + + if isinstance(child, TokenSequence): + result_children.extend(child.tree()) + else: + result_children.append(child.tree()) + + return Tree(self.rule_name(), result_children) + + def __init__(self, children): + self._children: List[LarkElement] = children + + def __init_subclass__(cls, **kwargs): + cls._classes.append(cls) + + def __repr__(self): + return f"" + + +class StartRule(LarkRule): + + _children: Tuple["BodyRule"] + + @staticmethod + def rule_name() -> str: + return "start" + + @property + def body(self) -> "BodyRule": + return self._children[0] + + def serialize(self) -> Any: + return self.body.serialize() + + +class BodyRule(LarkRule): + + _children: List[ + Union[ + "NewLineOrCommentRule", + "AttributeRule", + "BlockRule", + ] + ] + + @staticmethod + def rule_name() -> str: + return "body" + + def serialize(self) -> Any: + blocks: List[BlockRule] = [] + attributes: List[AttributeRule] = [] + comments = [] + + for child in self._children: + if isinstance(child, BlockRule): + blocks.append(child) + if isinstance(child, AttributeRule): + attributes.append(child) + if isinstance(child, NewLineOrCommentRule): + child_comments = child.actual_comments() + if child_comments: + comments.extend(child_comments) + + result = {} + + for attribute in attributes: + result.update( + {attribute.identifier.serialize(): attribute.expression.serialize()} + ) + + result.update( + {block.labels[0].serialize(): block.serialize() for block in blocks} + ) + + if comments: + result["__comments__"] = comments + + return result + + +class BlockRule(LarkRule): + @staticmethod + def rule_name() -> str: + return "block" + + def __init__(self, children): + super().__init__(children) + *self._labels, self._body = children + + @property + def labels(self) -> List["IdentifierRule"]: + return list(filter(lambda label: label is not None, self._labels)) + + @property + def body(self) -> BodyRule: + return self._body + + def serialize(self) -> BodyRule: + result = self._body.serialize() + labels = self._labels + for label in reversed(labels[1:]): + result = {label.serialize(): result} + return result + + +class IdentifierRule(LarkRule): + + _children: Tuple[TokenSequence] + + @staticmethod + def rule_name() -> str: + return "identifier" + + def __init__(self, children): + children = [TokenSequence(children)] + super().__init__(children) + + def serialize(self) -> Any: + return self._children[0].joined() + + +class IntLitRule(LarkRule): + + _children: Tuple[TokenSequence] + + @staticmethod + def rule_name() -> str: + return "int_lit" + + def __init__(self, children): + children = [TokenSequence(children)] + super().__init__(children) + + def serialize(self) -> Any: + return self._children[0].joined() + + +class FloatLitRule(LarkRule): + + _children: Tuple[TokenSequence] + + @staticmethod + def rule_name() -> str: + return "float_lit" + + def __init__(self, children): + print("float_lit", children) + children = [TokenSequence(children)] + super().__init__(children) + + def serialize(self) -> Any: + return self._children[0].joined() + + +class StringLitRule(LarkRule): + + _children: List[LarkToken] + + @staticmethod + def rule_name() -> str: + return "STRING_LIT" + + def serialize(self) -> Any: + return TokenSequence(self._children).joined()[1:-1] + + +class Expression(LarkRule, ABC): + @staticmethod + def rule_name() -> str: + return "expression" + + +class ExprTermRule(Expression): + @staticmethod + def rule_name() -> str: + return "expr_term" + + def __init__(self, children): + self._parentheses = False + if ( + isinstance(children[0], LarkToken) + and children[0].name == "LPAR" + and isinstance(children[-1], LarkToken) + and children[-1].name == "RPAR" + ): + self._parentheses = True + children = children[1:-1] + super().__init__(children) + + @property + def parentheses(self) -> bool: + return self._parentheses + + def serialize(self) -> Any: + result = self._children[0].serialize() + if self._parentheses: + result = wrap_into_parentheses(result) + result = to_dollar_string(result) + return result + + def tree(self) -> Tree: + tree = super().tree() + if self.parentheses: + return Tree( + tree.data, [Token("LPAR", "("), *tree.children, Token("RPAR", ")")] + ) + return tree + + +class ConditionalRule(ExprTermRule): + + _children: Tuple[ + Expression, + Expression, + Expression, + ] + + @staticmethod + def rule_name(): + return "conditional" + + @property + def condition(self) -> Expression: + return self._children[0] + + @property + def if_true(self) -> Expression: + return self._children[1] + + @property + def if_false(self) -> Expression: + return self._children[2] + + def __init__(self, children): + super().__init__(children) + + def serialize(self) -> Any: + result = f"{self.condition.serialize()} ? {self.if_true.serialize()} : {self.if_false.serialize()}" + return to_dollar_string(result) + + +class BinaryOperatorRule(LarkRule): + _children: List[LarkToken] + + @staticmethod + def rule_name() -> str: + return "binary_operator" + + def serialize(self) -> Any: + return TokenSequence(self._children).joined() + + +class BinaryTermRule(LarkRule): + _children: Tuple[ + BinaryOperatorRule, + Optional["NewLineOrCommentRule"], + ExprTermRule, + ] + + @staticmethod + def rule_name() -> str: + return "binary_term" + + def __init__(self, children): + if len(children) == 2: + children.insert(1, None) + super().__init__(children) + + @property + def binary_operator(self) -> BinaryOperatorRule: + return self._children[0] + + @property + def comment(self) -> Optional["NewLineOrCommentRule"]: + return self._children[1] + + @property + def has_comment(self) -> bool: + return self.comment is not None + + @property + def expr_term(self) -> ExprTermRule: + return self._children[2] + + def serialize(self) -> Any: + return f"{self.binary_operator.serialize()} {self.expr_term.serialize()}" + + +class UnaryOpRule(Expression): + _children: Tuple[LarkToken, ExprTermRule] + + @staticmethod + def rule_name() -> str: + return "unary_op" + + @property + def unary_operator(self) -> str: + return str(self._children[0]) + + @property + def expr_term(self): + return self._children[1] + + def serialize(self) -> Any: + return to_dollar_string(f"{self.unary_operator}{self.expr_term.serialize()}") + + +class BinaryOpRule(Expression): + _children: Tuple[ + ExprTermRule, + BinaryTermRule, + "NewLineOrCommentRule", + ] + + @staticmethod + def rule_name() -> str: + return "binary_op" + + @property + def expr_term(self) -> ExprTermRule: + return self._children[0] + + @property + def binary_term(self) -> BinaryTermRule: + return self._children[1] + + def serialize(self) -> Any: + lhs = self.expr_term.serialize() + operator = self.binary_term.binary_operator.serialize() + rhs = self.binary_term.expr_term.serialize() + rhs = unwrap_dollar_string(rhs) + return to_dollar_string(f"{lhs} {operator} {rhs}") + + +class AttributeRule(LarkRule): + _children: Tuple[ + IdentifierRule, + EQ_Token, + Expression, + ] + + @staticmethod + def rule_name() -> str: + return "attribute" + + @property + def identifier(self) -> IdentifierRule: + return self._children[0] + + @property + def expression(self) -> Expression: + return self._children[2] + + def serialize(self) -> Any: + return {self.identifier.serialize(): self.expression.serialize()} + + +class NewLineOrCommentRule(LarkRule): + + _children: List[LarkToken] + + @staticmethod + def rule_name() -> str: + return "new_line_or_comment" + + def serialize(self) -> Any: + return TokenSequence(self._children).joined() + + def actual_comments(self) -> Optional[List[str]]: + comment = self.serialize() + if comment == "\n": + return None + + comment = comment.strip() + comments = comment.split("\n") + + result = [] + for comment in comments: + if comment.startswith("//"): + comment = comment[2:] + + elif comment.startswith("#"): + comment = comment[1:] + + if comment != "": + result.append(comment.strip()) + + return result diff --git a/test/helpers/hcl2_helper.py b/test/helpers/hcl2_helper.py index 5acee1e7..c39ee7fb 100644 --- a/test/helpers/hcl2_helper.py +++ b/test/helpers/hcl2_helper.py @@ -3,7 +3,7 @@ from lark import Tree from hcl2.parser import parser -from hcl2.transformer import DictTransformer +from hcl2.dict_transformer import DictTransformer class Hcl2Helper: diff --git a/test/unit/test_dict_transformer.py b/test/unit/test_dict_transformer.py index 122332eb..baad5ba9 100644 --- a/test/unit/test_dict_transformer.py +++ b/test/unit/test_dict_transformer.py @@ -2,7 +2,7 @@ from unittest import TestCase -from hcl2.transformer import DictTransformer +from hcl2.dict_transformer import DictTransformer class TestDictTransformer(TestCase): From e39b42918b4f6dca5694bd836faa5ee649b8e560 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Wed, 26 Mar 2025 21:28:54 +0100 Subject: [PATCH 02/24] reorganize code --- hcl2/rule_transformer.py | 101 ---- hcl2/rule_transformer/__init__.py | 0 hcl2/rule_transformer/json.py | 12 + hcl2/rule_transformer/rules/__init__.py | 0 hcl2/rule_transformer/rules/abstract.py | 93 ++++ hcl2/rule_transformer/rules/base.py | 122 +++++ hcl2/rule_transformer/rules/expression.py | 187 +++++++ hcl2/rule_transformer/rules/token_sequence.py | 63 +++ hcl2/rule_transformer/rules/whitespace.py | 36 ++ hcl2/rule_transformer/transformer.py | 118 +++++ hcl2/rule_transformer/utils.py | 23 + hcl2/serialization.py | 496 ------------------ 12 files changed, 654 insertions(+), 597 deletions(-) delete mode 100644 hcl2/rule_transformer.py create mode 100644 hcl2/rule_transformer/__init__.py create mode 100644 hcl2/rule_transformer/json.py create mode 100644 hcl2/rule_transformer/rules/__init__.py create mode 100644 hcl2/rule_transformer/rules/abstract.py create mode 100644 hcl2/rule_transformer/rules/base.py create mode 100644 hcl2/rule_transformer/rules/expression.py create mode 100644 hcl2/rule_transformer/rules/token_sequence.py create mode 100644 hcl2/rule_transformer/rules/whitespace.py create mode 100644 hcl2/rule_transformer/transformer.py create mode 100644 hcl2/rule_transformer/utils.py delete mode 100644 hcl2/serialization.py diff --git a/hcl2/rule_transformer.py b/hcl2/rule_transformer.py deleted file mode 100644 index 8f0b922a..00000000 --- a/hcl2/rule_transformer.py +++ /dev/null @@ -1,101 +0,0 @@ -# pylint: disable=missing-function-docstring,unused-argument -from typing import List, Union - -from lark import Transformer, Tree, Token -from lark.visitors import _Leaf_T, _Return_T, Discard - -from hcl2.serialization import ( - LarkRule, - LarkToken, - StartRule, - BodyRule, - BlockRule, - IdentifierRule, - IntLitRule, - FloatLitRule, - StringLitRule, - ExprTermRule, - ConditionalRule, - BinaryOpRule, - BinaryOperatorRule, - BinaryTermRule, - UnaryOpRule, - AttributeRule, - NewLineOrCommentRule, -) - -ArgsType = List[Union[Token, Tree]] - - -class RuleTransformer(Transformer): - """Takes a syntax tree generated by the parser and - transforms it to a tree of LarkRule instances - """ - - with_meta: bool - - @staticmethod - def is_type_keyword(value: str) -> bool: - return value in {"bool", "number", "string"} - - def __init__(self, with_meta: bool = False, with_comments: bool = True): - """ - :param with_meta: If set to true then adds `__start_line__` and `__end_line__` - parameters to the output dict. Default to false. - """ - self._with_meta = with_meta - self._with_comments = with_comments - super().__init__() - - def start(self, args: ArgsType) -> StartRule: - return StartRule(args) - - def body(self, args: ArgsType) -> BodyRule: - return BodyRule(args) - - def block(self, args: ArgsType) -> BlockRule: - return BlockRule(args) - - def identifier(self, args: ArgsType) -> IdentifierRule: - return IdentifierRule(args) - - def int_lit(self, args: ArgsType) -> IntLitRule: - return IntLitRule(args) - - def float_lit(self, args: ArgsType) -> FloatLitRule: - return FloatLitRule(args) - - def string_lit(self, args: ArgsType) -> StringLitRule: - return StringLitRule(args) - - def expr_term(self, args: ArgsType) -> ExprTermRule: - return ExprTermRule(args) - - def conditional(self, args: ArgsType) -> ConditionalRule: - return ConditionalRule(args) - - def binary_operator(self, args: ArgsType) -> BinaryOperatorRule: - return BinaryOperatorRule(args) - - def binary_term(self, args: ArgsType) -> BinaryTermRule: - return BinaryTermRule(args) - - def unary_op(self, args: ArgsType) -> UnaryOpRule: - return UnaryOpRule(args) - - def binary_op(self, args: ArgsType) -> BinaryOpRule: - return BinaryOpRule(args) - - def attribute(self, args: ArgsType) -> AttributeRule: - return AttributeRule(args) - - def new_line_or_comment(self, args: ArgsType) -> NewLineOrCommentRule: - if self._with_comments: - return NewLineOrCommentRule(args) - return Discard - - def transform(self, tree: Tree[_Leaf_T]) -> LarkRule: - return super().transform(tree) - - def __default_token__(self, token: Token) -> LarkToken: - return LarkToken(token.type, token.value) diff --git a/hcl2/rule_transformer/__init__.py b/hcl2/rule_transformer/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/hcl2/rule_transformer/json.py b/hcl2/rule_transformer/json.py new file mode 100644 index 00000000..647b6683 --- /dev/null +++ b/hcl2/rule_transformer/json.py @@ -0,0 +1,12 @@ +from json import JSONEncoder +from typing import Any + +from hcl2.rule_transformer.rules.abstract import LarkRule + + +class LarkEncoder(JSONEncoder): + def default(self, obj: Any): + if isinstance(obj, LarkRule): + return obj.serialize() + else: + return super().default(obj) diff --git a/hcl2/rule_transformer/rules/__init__.py b/hcl2/rule_transformer/rules/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/hcl2/rule_transformer/rules/abstract.py b/hcl2/rule_transformer/rules/abstract.py new file mode 100644 index 00000000..37f63a03 --- /dev/null +++ b/hcl2/rule_transformer/rules/abstract.py @@ -0,0 +1,93 @@ +from abc import ABC, abstractmethod +from typing import Any, Union, List, Optional + +from lark import Token, Tree +from lark.tree import Meta + + +class LarkElement(ABC): + @abstractmethod + def tree(self) -> Token: + raise NotImplementedError() + + @abstractmethod + def serialize(self) -> Any: + raise NotImplementedError() + + +class LarkToken(LarkElement): + def __init__(self, name: str, value: Union[str, int]): + self._name = name + self._value = value + + @property + def name(self) -> str: + return self._name + + @property + def value(self): + return self._value + + def serialize(self) -> Any: + return self._value + + def tree(self) -> Token: + return Token(self.name, self.value) + + def __str__(self) -> str: + return str(self._value) + + def __repr__(self) -> str: + return f"" + + +EQ_Token = LarkToken +COLON_TOKEN = LarkToken +LPAR_TOKEN = LarkToken # left parenthesis +RPAR_TOKEN = LarkToken # right parenthesis + + +class TokenSequence: + def __init__(self, tokens: List[LarkToken]): + self.tokens = tokens + + def tree(self) -> List[Token]: + return [token.tree() for token in self.tokens] + + def joined(self): + return "".join(str(token) for token in self.tokens) + + +class LarkRule(ABC): + @staticmethod + @abstractmethod + def rule_name() -> str: + raise NotImplementedError() + + @abstractmethod + def serialize(self) -> Any: + raise NotImplementedError() + + @property + def children(self) -> List[LarkElement]: + return self._children + + def tree(self) -> Tree: + result_children = [] + for child in self._children: + if child is None: + continue + + if isinstance(child, TokenSequence): + result_children.extend(child.tree()) + else: + result_children.append(child.tree()) + + return Tree(self.rule_name(), result_children) + + def __init__(self, children, meta: Optional[Meta] = None): + self._children = children + self._meta = meta + + def __repr__(self): + return f"" diff --git a/hcl2/rule_transformer/rules/base.py b/hcl2/rule_transformer/rules/base.py new file mode 100644 index 00000000..f46d8039 --- /dev/null +++ b/hcl2/rule_transformer/rules/base.py @@ -0,0 +1,122 @@ +from typing import Tuple, Any, List, Union, Optional + +from lark.tree import Meta + +from hcl2.rule_transformer.rules.abstract import LarkRule, EQ_Token +from hcl2.rule_transformer.rules.expression import Expression +from hcl2.rule_transformer.rules.token_sequence import IdentifierRule + +from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule + + +class AttributeRule(LarkRule): + _children: Tuple[ + IdentifierRule, + EQ_Token, + Expression, + ] + + @staticmethod + def rule_name() -> str: + return "attribute" + + @property + def identifier(self) -> IdentifierRule: + return self._children[0] + + @property + def expression(self) -> Expression: + return self._children[2] + + def serialize(self) -> Any: + return {self.identifier.serialize(): self.expression.serialize()} + + +class BodyRule(LarkRule): + + _children: List[ + Union[ + NewLineOrCommentRule, + AttributeRule, + "BlockRule", + ] + ] + + @staticmethod + def rule_name() -> str: + return "body" + + def serialize(self) -> Any: + blocks: List[BlockRule] = [] + attributes: List[AttributeRule] = [] + comments = [] + + for child in self._children: + if isinstance(child, BlockRule): + blocks.append(child) + if isinstance(child, AttributeRule): + attributes.append(child) + if isinstance(child, NewLineOrCommentRule): + child_comments = child.actual_comments() + if child_comments: + comments.extend(child_comments) + + result = {} + + for attribute in attributes: + result.update( + {attribute.identifier.serialize(): attribute.expression.serialize()} + ) + + result.update( + {block.labels[0].serialize(): block.serialize() for block in blocks} + ) + + if comments: + result["__comments__"] = comments + + return result + + +class StartRule(LarkRule): + + _children: Tuple[BodyRule] + + @staticmethod + def rule_name() -> str: + return "start" + + @property + def body(self) -> BodyRule: + return self._children[0] + + def serialize(self) -> Any: + return self.body.serialize() + + +class BlockRule(LarkRule): + + _children: Tuple[BodyRule] + + @staticmethod + def rule_name() -> str: + return "block" + + def __init__(self, children, meta: Optional[Meta] = None): + super().__init__(children) + *self._labels, self._body = children + + @property + def labels(self) -> List[IdentifierRule]: + return list(filter(lambda label: label is not None, self._labels)) + + @property + def body(self) -> BodyRule: + return self._body + + def serialize(self) -> BodyRule: + result = self._body.serialize() + labels = self._labels + for label in reversed(labels[1:]): + result = {label.serialize(): result} + return result diff --git a/hcl2/rule_transformer/rules/expression.py b/hcl2/rule_transformer/rules/expression.py new file mode 100644 index 00000000..2a38912a --- /dev/null +++ b/hcl2/rule_transformer/rules/expression.py @@ -0,0 +1,187 @@ +from abc import ABC +from typing import Any, Tuple, Optional, List + +from lark import Tree, Token +from lark.tree import Meta + +from hcl2.rule_transformer.rules.abstract import ( + LarkRule, + LarkToken, + LPAR_TOKEN, + RPAR_TOKEN, +) +from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule +from hcl2.rule_transformer.rules.token_sequence import BinaryOperatorRule +from hcl2.rule_transformer.utils import ( + wrap_into_parentheses, + to_dollar_string, + unwrap_dollar_string, +) + + +class Expression(LarkRule, ABC): + @staticmethod + def rule_name() -> str: + return "expression" + + +class ExprTermRule(Expression): + + _children: Tuple[ + Optional[LPAR_TOKEN], + Optional[NewLineOrCommentRule], + Expression, + Optional[NewLineOrCommentRule], + Optional[RPAR_TOKEN], + ] + + @staticmethod + def rule_name() -> str: + return "expr_term" + + def __init__(self, children, meta: Optional[Meta] = None): + self._parentheses = False + if ( + isinstance(children[0], LarkToken) + and children[0].name == "LPAR" + and isinstance(children[-1], LarkToken) + and children[-1].name == "RPAR" + ): + self._parentheses = True + children = children[1:-1] + super().__init__(children, meta) + + @property + def parentheses(self) -> bool: + return self._parentheses + + def serialize(self) -> Any: + result = self._children[0].serialize() + if self.parentheses: + result = wrap_into_parentheses(result) + result = to_dollar_string(result) + return result + + def tree(self) -> Tree: + tree = super().tree() + if self.parentheses: + return Tree( + tree.data, [Token("LPAR", "("), *tree.children, Token("RPAR", ")")] + ) + return tree + + +class ConditionalRule(LarkRule): + + _children: Tuple[ + Expression, + Expression, + Expression, + ] + + @staticmethod + def rule_name(): + return "conditional" + + @property + def condition(self) -> Expression: + return self._children[0] + + @property + def if_true(self) -> Expression: + return self._children[1] + + @property + def if_false(self) -> Expression: + return self._children[2] + + def __init__(self, children, meta: Optional[Meta] = None): + super().__init__(children, meta) + + def serialize(self) -> Any: + result = f"{self.condition.serialize()} ? {self.if_true.serialize()} : {self.if_false.serialize()}" + return to_dollar_string(result) + + +class BinaryTermRule(LarkRule): + + _children: Tuple[ + BinaryOperatorRule, + Optional[NewLineOrCommentRule], + ExprTermRule, + ] + + @staticmethod + def rule_name() -> str: + return "binary_term" + + def __init__(self, children, meta: Optional[Meta] = None): + if len(children) == 2: + children.insert(1, None) + super().__init__(children, meta) + + @property + def binary_operator(self) -> BinaryOperatorRule: + return self._children[0] + + @property + def comment(self) -> Optional[NewLineOrCommentRule]: + return self._children[1] + + @property + def has_comment(self) -> bool: + return self.comment is not None + + @property + def expr_term(self) -> ExprTermRule: + return self._children[2] + + def serialize(self) -> Any: + return f"{self.binary_operator.serialize()} {self.expr_term.serialize()}" + + +class BinaryOpRule(Expression): + _children: Tuple[ + ExprTermRule, + BinaryTermRule, + NewLineOrCommentRule, + ] + + @staticmethod + def rule_name() -> str: + return "binary_op" + + @property + def expr_term(self) -> ExprTermRule: + return self._children[0] + + @property + def binary_term(self) -> BinaryTermRule: + return self._children[1] + + def serialize(self) -> Any: + lhs = self.expr_term.serialize() + operator = self.binary_term.binary_operator.serialize() + rhs = self.binary_term.expr_term.serialize() + rhs = unwrap_dollar_string(rhs) + return to_dollar_string(f"{lhs} {operator} {rhs}") + + +class UnaryOpRule(Expression): + + _children: Tuple[LarkToken, ExprTermRule] + + @staticmethod + def rule_name() -> str: + return "unary_op" + + @property + def operator(self) -> str: + return str(self._children[0]) + + @property + def expr_term(self): + return self._children[1] + + def serialize(self) -> Any: + return to_dollar_string(f"{self.operator}{self.expr_term.serialize()}") diff --git a/hcl2/rule_transformer/rules/token_sequence.py b/hcl2/rule_transformer/rules/token_sequence.py new file mode 100644 index 00000000..66e22e2f --- /dev/null +++ b/hcl2/rule_transformer/rules/token_sequence.py @@ -0,0 +1,63 @@ +from abc import ABC +from typing import Tuple, Any, List, Optional + +from lark.tree import Meta + +from hcl2.rule_transformer.rules.abstract import TokenSequence, LarkRule, LarkToken + + +class TokenSequenceRule(LarkRule, ABC): + + _children: Tuple[TokenSequence] + + def __init__(self, children: List[LarkToken], meta: Optional[Meta] = None): + children = [TokenSequence(children)] + super().__init__(children) + + def serialize(self) -> Any: + return self._children[0].joined() + + +class IdentifierRule(TokenSequenceRule): + @staticmethod + def rule_name() -> str: + return "identifier" + + def serialize(self) -> str: + return str(super().serialize()) + + +class IntLitRule(TokenSequenceRule): + @staticmethod + def rule_name() -> str: + return "int_lit" + + def serialize(self) -> float: + return int(super().serialize()) + + +class FloatLitRule(TokenSequenceRule): + @staticmethod + def rule_name() -> str: + return "float_lit" + + def serialize(self) -> float: + return float(super().serialize()) + + +class StringLitRule(TokenSequenceRule): + @staticmethod + def rule_name() -> str: + return "STRING_LIT" + + def serialize(self) -> str: + return str(super().serialize()) + + +class BinaryOperatorRule(TokenSequenceRule): + @staticmethod + def rule_name() -> str: + return "binary_operator" + + def serialize(self) -> str: + return str(super().serialize()) diff --git a/hcl2/rule_transformer/rules/whitespace.py b/hcl2/rule_transformer/rules/whitespace.py new file mode 100644 index 00000000..f56a386e --- /dev/null +++ b/hcl2/rule_transformer/rules/whitespace.py @@ -0,0 +1,36 @@ +from typing import Optional, List, Any + +from hcl2.rule_transformer.rules.abstract import TokenSequence, LarkToken, LarkRule + + +class NewLineOrCommentRule(LarkRule): + + _children: List[LarkToken] + + @staticmethod + def rule_name() -> str: + return "new_line_or_comment" + + def serialize(self) -> Any: + return TokenSequence(self._children).joined() + + def actual_comments(self) -> Optional[List[str]]: + comment = self.serialize() + if comment == "\n": + return None + + comment = comment.strip() + comments = comment.split("\n") + + result = [] + for comment in comments: + if comment.startswith("//"): + comment = comment[2:] + + elif comment.startswith("#"): + comment = comment[1:] + + if comment != "": + result.append(comment.strip()) + + return result diff --git a/hcl2/rule_transformer/transformer.py b/hcl2/rule_transformer/transformer.py new file mode 100644 index 00000000..9e6af6ef --- /dev/null +++ b/hcl2/rule_transformer/transformer.py @@ -0,0 +1,118 @@ +# pylint: disable=missing-function-docstring,unused-argument +from typing import List, Union + +from lark import Transformer, Tree, Token +from lark.tree import Meta +from lark.visitors import _Leaf_T, Discard, v_args + +from hcl2.rule_transformer.rules.abstract import LarkToken, LarkRule +from hcl2.rule_transformer.rules.base import ( + StartRule, + BodyRule, + BlockRule, + AttributeRule, +) +from hcl2.rule_transformer.rules.expression import ( + BinaryTermRule, + ConditionalRule, + ExprTermRule, + BinaryOpRule, + UnaryOpRule, +) +from hcl2.rule_transformer.rules.token_sequence import ( + IdentifierRule, + IntLitRule, + FloatLitRule, + StringLitRule, + BinaryOperatorRule, +) +from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule + + +class RuleTransformer(Transformer): + """Takes a syntax tree generated by the parser and + transforms it to a tree of LarkRule instances + """ + + with_meta: bool + + @staticmethod + def is_type_keyword(value: str) -> bool: + return value in {"bool", "number", "string"} + + def __init__(self, with_meta: bool = False, with_comments: bool = True): + """ + :param with_meta: If set to true then adds `__start_line__` and `__end_line__` + parameters to the output dict. Default to false. + """ + self._with_meta = with_meta + self._with_comments = with_comments + super().__init__() + + def transform(self, tree: Tree[_Leaf_T]) -> LarkRule: + return super().transform(tree) + + def __default_token__(self, token: Token) -> LarkToken: + return LarkToken(token.type, token.value) + + @v_args(meta=True) + def start(self, meta: Meta, args) -> StartRule: + return StartRule(args, meta) + + @v_args(meta=True) + def body(self, meta: Meta, args) -> BodyRule: + return BodyRule(args, meta) + + @v_args(meta=True) + def block(self, meta: Meta, args) -> BlockRule: + return BlockRule(args, meta) + + @v_args(meta=True) + def identifier(self, meta: Meta, args) -> IdentifierRule: + return IdentifierRule(args, meta) + + @v_args(meta=True) + def int_lit(self, meta: Meta, args) -> IntLitRule: + return IntLitRule(args, meta) + + @v_args(meta=True) + def float_lit(self, meta: Meta, args) -> FloatLitRule: + return FloatLitRule(args, meta) + + @v_args(meta=True) + def string_lit(self, meta: Meta, args) -> StringLitRule: + return StringLitRule(args, meta) + + @v_args(meta=True) + def expr_term(self, meta: Meta, args) -> ExprTermRule: + return ExprTermRule(args, meta) + + @v_args(meta=True) + def conditional(self, meta: Meta, args) -> ConditionalRule: + return ConditionalRule(args, meta) + + @v_args(meta=True) + def binary_operator(self, meta: Meta, args) -> BinaryOperatorRule: + return BinaryOperatorRule(args, meta) + + @v_args(meta=True) + def binary_term(self, meta: Meta, args) -> BinaryTermRule: + return BinaryTermRule(args, meta) + + @v_args(meta=True) + def unary_op(self, meta: Meta, args) -> UnaryOpRule: + return UnaryOpRule(args, meta) + + @v_args(meta=True) + def binary_op(self, meta: Meta, args) -> BinaryOpRule: + return BinaryOpRule(args, meta) + + @v_args(meta=True) + def attribute(self, meta: Meta, args) -> AttributeRule: + return AttributeRule(args, meta) + + @v_args(meta=True) + def new_line_or_comment(self, meta: Meta, args) -> NewLineOrCommentRule: + if self._with_comments: + return NewLineOrCommentRule(args, meta) + return Discard diff --git a/hcl2/rule_transformer/utils.py b/hcl2/rule_transformer/utils.py new file mode 100644 index 00000000..060d3b53 --- /dev/null +++ b/hcl2/rule_transformer/utils.py @@ -0,0 +1,23 @@ +def is_dollar_string(value: str) -> bool: + if not isinstance(value, str): + return False + return value.startswith("${") and value.endswith("}") + + +def to_dollar_string(value: str) -> str: + if not is_dollar_string(value): + return f"${{{value}}}" + return value + + +def unwrap_dollar_string(value: str) -> str: + if is_dollar_string(value): + return value[2:-1] + return value + + +def wrap_into_parentheses(value: str) -> str: + if is_dollar_string(value): + value = unwrap_dollar_string(value) + return to_dollar_string(f"({value})") + return f"({value})" diff --git a/hcl2/serialization.py b/hcl2/serialization.py deleted file mode 100644 index 15d75caa..00000000 --- a/hcl2/serialization.py +++ /dev/null @@ -1,496 +0,0 @@ -from abc import ABC, abstractmethod -from json import JSONEncoder -from typing import List, Any, Union, Tuple, Optional - -from lark import Tree, Token - -ArgsType = List["LarkElement"] - - -def is_dollar_string(value: str) -> bool: - return value.startswith("${") and value.endswith("}") - - -def to_dollar_string(value: str) -> str: - if not is_dollar_string(value): - return f"${{{value}}}" - return value - - -def unwrap_dollar_string(value: str) -> str: - if is_dollar_string(value): - return value[2:-1] - return value - - -def wrap_into_parentheses(value: str) -> str: - if is_dollar_string(value): - value = unwrap_dollar_string(value) - return to_dollar_string(f"({value})") - - return f"({value})" - - -class LarkEncoder(JSONEncoder): - def default(self, obj: Any): - if isinstance(obj, LarkRule): - return obj.serialize() - else: - return super().default(obj) - - -class LarkElement(ABC): - @abstractmethod - def tree(self) -> Token: - raise NotImplementedError() - - @abstractmethod - def serialize(self) -> Any: - raise NotImplementedError() - - -class LarkToken(LarkElement): - def __init__(self, name: str, value: Union[str, int]): - self._name = name - self._value = value - - @property - def name(self) -> str: - return self._name - - @property - def value(self): - return self._value - - def serialize(self) -> Any: - return self._value - - def tree(self) -> Token: - return Token(self.name, self.value) - - def __str__(self) -> str: - return str(self._value) - - def __repr__(self) -> str: - return f"" - - -EQ_Token = LarkToken - - -class TokenSequence: - def __init__(self, tokens: List[LarkToken]): - self.tokens = tokens - - def tree(self) -> List[Token]: - return [token.tree() for token in self.tokens] - - def joined(self): - return "".join(str(token) for token in self.tokens) - - -class LarkRule(ABC): - _classes = [] - - @staticmethod - @abstractmethod - def rule_name() -> str: - raise NotImplementedError() - - @abstractmethod - def serialize(self) -> Any: - raise NotImplementedError() - - def tree(self) -> Tree: - result_children = [] - for child in self._children: - if child is None: - continue - - if isinstance(child, TokenSequence): - result_children.extend(child.tree()) - else: - result_children.append(child.tree()) - - return Tree(self.rule_name(), result_children) - - def __init__(self, children): - self._children: List[LarkElement] = children - - def __init_subclass__(cls, **kwargs): - cls._classes.append(cls) - - def __repr__(self): - return f"" - - -class StartRule(LarkRule): - - _children: Tuple["BodyRule"] - - @staticmethod - def rule_name() -> str: - return "start" - - @property - def body(self) -> "BodyRule": - return self._children[0] - - def serialize(self) -> Any: - return self.body.serialize() - - -class BodyRule(LarkRule): - - _children: List[ - Union[ - "NewLineOrCommentRule", - "AttributeRule", - "BlockRule", - ] - ] - - @staticmethod - def rule_name() -> str: - return "body" - - def serialize(self) -> Any: - blocks: List[BlockRule] = [] - attributes: List[AttributeRule] = [] - comments = [] - - for child in self._children: - if isinstance(child, BlockRule): - blocks.append(child) - if isinstance(child, AttributeRule): - attributes.append(child) - if isinstance(child, NewLineOrCommentRule): - child_comments = child.actual_comments() - if child_comments: - comments.extend(child_comments) - - result = {} - - for attribute in attributes: - result.update( - {attribute.identifier.serialize(): attribute.expression.serialize()} - ) - - result.update( - {block.labels[0].serialize(): block.serialize() for block in blocks} - ) - - if comments: - result["__comments__"] = comments - - return result - - -class BlockRule(LarkRule): - @staticmethod - def rule_name() -> str: - return "block" - - def __init__(self, children): - super().__init__(children) - *self._labels, self._body = children - - @property - def labels(self) -> List["IdentifierRule"]: - return list(filter(lambda label: label is not None, self._labels)) - - @property - def body(self) -> BodyRule: - return self._body - - def serialize(self) -> BodyRule: - result = self._body.serialize() - labels = self._labels - for label in reversed(labels[1:]): - result = {label.serialize(): result} - return result - - -class IdentifierRule(LarkRule): - - _children: Tuple[TokenSequence] - - @staticmethod - def rule_name() -> str: - return "identifier" - - def __init__(self, children): - children = [TokenSequence(children)] - super().__init__(children) - - def serialize(self) -> Any: - return self._children[0].joined() - - -class IntLitRule(LarkRule): - - _children: Tuple[TokenSequence] - - @staticmethod - def rule_name() -> str: - return "int_lit" - - def __init__(self, children): - children = [TokenSequence(children)] - super().__init__(children) - - def serialize(self) -> Any: - return self._children[0].joined() - - -class FloatLitRule(LarkRule): - - _children: Tuple[TokenSequence] - - @staticmethod - def rule_name() -> str: - return "float_lit" - - def __init__(self, children): - print("float_lit", children) - children = [TokenSequence(children)] - super().__init__(children) - - def serialize(self) -> Any: - return self._children[0].joined() - - -class StringLitRule(LarkRule): - - _children: List[LarkToken] - - @staticmethod - def rule_name() -> str: - return "STRING_LIT" - - def serialize(self) -> Any: - return TokenSequence(self._children).joined()[1:-1] - - -class Expression(LarkRule, ABC): - @staticmethod - def rule_name() -> str: - return "expression" - - -class ExprTermRule(Expression): - @staticmethod - def rule_name() -> str: - return "expr_term" - - def __init__(self, children): - self._parentheses = False - if ( - isinstance(children[0], LarkToken) - and children[0].name == "LPAR" - and isinstance(children[-1], LarkToken) - and children[-1].name == "RPAR" - ): - self._parentheses = True - children = children[1:-1] - super().__init__(children) - - @property - def parentheses(self) -> bool: - return self._parentheses - - def serialize(self) -> Any: - result = self._children[0].serialize() - if self._parentheses: - result = wrap_into_parentheses(result) - result = to_dollar_string(result) - return result - - def tree(self) -> Tree: - tree = super().tree() - if self.parentheses: - return Tree( - tree.data, [Token("LPAR", "("), *tree.children, Token("RPAR", ")")] - ) - return tree - - -class ConditionalRule(ExprTermRule): - - _children: Tuple[ - Expression, - Expression, - Expression, - ] - - @staticmethod - def rule_name(): - return "conditional" - - @property - def condition(self) -> Expression: - return self._children[0] - - @property - def if_true(self) -> Expression: - return self._children[1] - - @property - def if_false(self) -> Expression: - return self._children[2] - - def __init__(self, children): - super().__init__(children) - - def serialize(self) -> Any: - result = f"{self.condition.serialize()} ? {self.if_true.serialize()} : {self.if_false.serialize()}" - return to_dollar_string(result) - - -class BinaryOperatorRule(LarkRule): - _children: List[LarkToken] - - @staticmethod - def rule_name() -> str: - return "binary_operator" - - def serialize(self) -> Any: - return TokenSequence(self._children).joined() - - -class BinaryTermRule(LarkRule): - _children: Tuple[ - BinaryOperatorRule, - Optional["NewLineOrCommentRule"], - ExprTermRule, - ] - - @staticmethod - def rule_name() -> str: - return "binary_term" - - def __init__(self, children): - if len(children) == 2: - children.insert(1, None) - super().__init__(children) - - @property - def binary_operator(self) -> BinaryOperatorRule: - return self._children[0] - - @property - def comment(self) -> Optional["NewLineOrCommentRule"]: - return self._children[1] - - @property - def has_comment(self) -> bool: - return self.comment is not None - - @property - def expr_term(self) -> ExprTermRule: - return self._children[2] - - def serialize(self) -> Any: - return f"{self.binary_operator.serialize()} {self.expr_term.serialize()}" - - -class UnaryOpRule(Expression): - _children: Tuple[LarkToken, ExprTermRule] - - @staticmethod - def rule_name() -> str: - return "unary_op" - - @property - def unary_operator(self) -> str: - return str(self._children[0]) - - @property - def expr_term(self): - return self._children[1] - - def serialize(self) -> Any: - return to_dollar_string(f"{self.unary_operator}{self.expr_term.serialize()}") - - -class BinaryOpRule(Expression): - _children: Tuple[ - ExprTermRule, - BinaryTermRule, - "NewLineOrCommentRule", - ] - - @staticmethod - def rule_name() -> str: - return "binary_op" - - @property - def expr_term(self) -> ExprTermRule: - return self._children[0] - - @property - def binary_term(self) -> BinaryTermRule: - return self._children[1] - - def serialize(self) -> Any: - lhs = self.expr_term.serialize() - operator = self.binary_term.binary_operator.serialize() - rhs = self.binary_term.expr_term.serialize() - rhs = unwrap_dollar_string(rhs) - return to_dollar_string(f"{lhs} {operator} {rhs}") - - -class AttributeRule(LarkRule): - _children: Tuple[ - IdentifierRule, - EQ_Token, - Expression, - ] - - @staticmethod - def rule_name() -> str: - return "attribute" - - @property - def identifier(self) -> IdentifierRule: - return self._children[0] - - @property - def expression(self) -> Expression: - return self._children[2] - - def serialize(self) -> Any: - return {self.identifier.serialize(): self.expression.serialize()} - - -class NewLineOrCommentRule(LarkRule): - - _children: List[LarkToken] - - @staticmethod - def rule_name() -> str: - return "new_line_or_comment" - - def serialize(self) -> Any: - return TokenSequence(self._children).joined() - - def actual_comments(self) -> Optional[List[str]]: - comment = self.serialize() - if comment == "\n": - return None - - comment = comment.strip() - comments = comment.split("\n") - - result = [] - for comment in comments: - if comment.startswith("//"): - comment = comment[2:] - - elif comment.startswith("#"): - comment = comment[1:] - - if comment != "": - result.append(comment.strip()) - - return result From d9c2eca1f99a7edf9b6e16603755c5113dc8a8d7 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Wed, 2 Apr 2025 16:19:11 +0200 Subject: [PATCH 03/24] batch of different changes --- hcl2/rule_transformer/rules/abstract.py | 32 +++--- hcl2/rule_transformer/rules/base.py | 55 ++++++--- hcl2/rule_transformer/rules/expression.py | 104 +++++++++++------- hcl2/rule_transformer/rules/token_sequence.py | 31 +++--- hcl2/rule_transformer/rules/whitespace.py | 11 +- hcl2/rule_transformer/transformer.py | 12 +- hcl2/rule_transformer/utils.py | 9 ++ 7 files changed, 152 insertions(+), 102 deletions(-) diff --git a/hcl2/rule_transformer/rules/abstract.py b/hcl2/rule_transformer/rules/abstract.py index 37f63a03..6c650ea3 100644 --- a/hcl2/rule_transformer/rules/abstract.py +++ b/hcl2/rule_transformer/rules/abstract.py @@ -4,14 +4,16 @@ from lark import Token, Tree from lark.tree import Meta +from hcl2.rule_transformer.utils import SerializationOptions + class LarkElement(ABC): @abstractmethod - def tree(self) -> Token: + def reverse(self) -> Any: raise NotImplementedError() @abstractmethod - def serialize(self) -> Any: + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: raise NotImplementedError() @@ -28,10 +30,10 @@ def name(self) -> str: def value(self): return self._value - def serialize(self) -> Any: + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: return self._value - def tree(self) -> Token: + def reverse(self) -> Token: return Token(self.name, self.value) def __str__(self) -> str: @@ -47,45 +49,45 @@ def __repr__(self) -> str: RPAR_TOKEN = LarkToken # right parenthesis -class TokenSequence: +class TokenSequence(LarkElement): def __init__(self, tokens: List[LarkToken]): self.tokens = tokens - def tree(self) -> List[Token]: - return [token.tree() for token in self.tokens] + def reverse(self) -> List[Token]: + return [token.reverse() for token in self.tokens] - def joined(self): + def serialize(self, options: SerializationOptions = SerializationOptions()): return "".join(str(token) for token in self.tokens) -class LarkRule(ABC): +class LarkRule(LarkElement, ABC): @staticmethod @abstractmethod def rule_name() -> str: raise NotImplementedError() @abstractmethod - def serialize(self) -> Any: + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: raise NotImplementedError() @property def children(self) -> List[LarkElement]: return self._children - def tree(self) -> Tree: + def reverse(self) -> Tree: result_children = [] for child in self._children: if child is None: continue if isinstance(child, TokenSequence): - result_children.extend(child.tree()) + result_children.extend(child.reverse()) else: - result_children.append(child.tree()) + result_children.append(child.reverse()) - return Tree(self.rule_name(), result_children) + return Tree(self.rule_name(), result_children, meta=self._meta) - def __init__(self, children, meta: Optional[Meta] = None): + def __init__(self, children: List, meta: Optional[Meta] = None): self._children = children self._meta = meta diff --git a/hcl2/rule_transformer/rules/base.py b/hcl2/rule_transformer/rules/base.py index f46d8039..76d014e9 100644 --- a/hcl2/rule_transformer/rules/base.py +++ b/hcl2/rule_transformer/rules/base.py @@ -1,3 +1,4 @@ +from collections import defaultdict from typing import Tuple, Any, List, Union, Optional from lark.tree import Meta @@ -7,6 +8,7 @@ from hcl2.rule_transformer.rules.token_sequence import IdentifierRule from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule +from hcl2.rule_transformer.utils import SerializationOptions class AttributeRule(LarkRule): @@ -28,8 +30,8 @@ def identifier(self) -> IdentifierRule: def expression(self) -> Expression: return self._children[2] - def serialize(self) -> Any: - return {self.identifier.serialize(): self.expression.serialize()} + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return {self.identifier.serialize(options): self.expression.serialize(options)} class BodyRule(LarkRule): @@ -46,18 +48,23 @@ class BodyRule(LarkRule): def rule_name() -> str: return "body" - def serialize(self) -> Any: + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: blocks: List[BlockRule] = [] attributes: List[AttributeRule] = [] comments = [] - + inline_comments = [] for child in self._children: + if isinstance(child, BlockRule): blocks.append(child) + if isinstance(child, AttributeRule): attributes.append(child) + # collect in-line comments from attribute assignments, expressions etc + inline_comments.extend(child.expression.inline_comments()) + if isinstance(child, NewLineOrCommentRule): - child_comments = child.actual_comments() + child_comments = child.to_list() if child_comments: comments.extend(child_comments) @@ -65,15 +72,27 @@ def serialize(self) -> Any: for attribute in attributes: result.update( - {attribute.identifier.serialize(): attribute.expression.serialize()} + { + attribute.identifier.serialize( + options + ): attribute.expression.serialize(options) + } ) - result.update( - {block.labels[0].serialize(): block.serialize() for block in blocks} - ) + result_blocks = defaultdict(list) + for block in blocks: + name = block.labels[0].serialize(options) + if name in result.keys(): + raise RuntimeError(f"Attribute {name} is already defined.") + result_blocks[name].append(block.serialize(options)) + + result.update(**result_blocks) - if comments: - result["__comments__"] = comments + if options.with_comments: + if comments: + result["__comments__"] = comments + if inline_comments: + result["__inline_comments__"] = inline_comments return result @@ -90,8 +109,8 @@ def rule_name() -> str: def body(self) -> BodyRule: return self._children[0] - def serialize(self) -> Any: - return self.body.serialize() + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return self.body.serialize(options) class BlockRule(LarkRule): @@ -103,7 +122,7 @@ def rule_name() -> str: return "block" def __init__(self, children, meta: Optional[Meta] = None): - super().__init__(children) + super().__init__(children, meta) *self._labels, self._body = children @property @@ -114,9 +133,11 @@ def labels(self) -> List[IdentifierRule]: def body(self) -> BodyRule: return self._body - def serialize(self) -> BodyRule: - result = self._body.serialize() + def serialize( + self, options: SerializationOptions = SerializationOptions() + ) -> BodyRule: + result = self._body.serialize(options) labels = self._labels for label in reversed(labels[1:]): - result = {label.serialize(): result} + result = {label.serialize(options): result} return result diff --git a/hcl2/rule_transformer/rules/expression.py b/hcl2/rule_transformer/rules/expression.py index 2a38912a..16daf310 100644 --- a/hcl2/rule_transformer/rules/expression.py +++ b/hcl2/rule_transformer/rules/expression.py @@ -16,6 +16,7 @@ wrap_into_parentheses, to_dollar_string, unwrap_dollar_string, + SerializationOptions, ) @@ -24,10 +25,35 @@ class Expression(LarkRule, ABC): def rule_name() -> str: return "expression" + def __init__(self, children, meta: Optional[Meta] = None): + super().__init__(children, meta) + + def inline_comments(self): + result = [] + for child in self._children: + + if isinstance(child, NewLineOrCommentRule): + result.extend(child.to_list()) + + elif isinstance(child, Expression): + result.extend(child.inline_comments()) + + return result + + def _possibly_insert_null_comments(self, children: List, indexes: List[int] = None): + for index in indexes: + try: + child = children[index] + except IndexError: + children.insert(index, None) + else: + if not isinstance(child, NewLineOrCommentRule): + children.insert(index, None) + class ExprTermRule(Expression): - _children: Tuple[ + type_ = Tuple[ Optional[LPAR_TOKEN], Optional[NewLineOrCommentRule], Expression, @@ -35,6 +61,8 @@ class ExprTermRule(Expression): Optional[RPAR_TOKEN], ] + _children: type_ + @staticmethod def rule_name() -> str: return "expr_term" @@ -48,34 +76,36 @@ def __init__(self, children, meta: Optional[Meta] = None): and children[-1].name == "RPAR" ): self._parentheses = True - children = children[1:-1] + else: + children = [None, *children, None] + + self._possibly_insert_null_comments(children, [1, 3]) super().__init__(children, meta) @property def parentheses(self) -> bool: return self._parentheses - def serialize(self) -> Any: - result = self._children[0].serialize() + @property + def expression(self) -> Expression: + return self._children[2] + + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + result = self.expression.serialize(options) if self.parentheses: result = wrap_into_parentheses(result) result = to_dollar_string(result) return result - def tree(self) -> Tree: - tree = super().tree() - if self.parentheses: - return Tree( - tree.data, [Token("LPAR", "("), *tree.children, Token("RPAR", ")")] - ) - return tree - -class ConditionalRule(LarkRule): +class ConditionalRule(Expression): _children: Tuple[ Expression, + Optional[NewLineOrCommentRule], Expression, + Optional[NewLineOrCommentRule], + Optional[NewLineOrCommentRule], Expression, ] @@ -83,27 +113,28 @@ class ConditionalRule(LarkRule): def rule_name(): return "conditional" + def __init__(self, children, meta: Optional[Meta] = None): + self._possibly_insert_null_comments(children, [1, 3, 4]) + super().__init__(children, meta) + @property def condition(self) -> Expression: return self._children[0] @property def if_true(self) -> Expression: - return self._children[1] + return self._children[2] @property def if_false(self) -> Expression: - return self._children[2] + return self._children[5] - def __init__(self, children, meta: Optional[Meta] = None): - super().__init__(children, meta) - - def serialize(self) -> Any: - result = f"{self.condition.serialize()} ? {self.if_true.serialize()} : {self.if_false.serialize()}" + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + result = f"{self.condition.serialize(options)} ? {self.if_true.serialize(options)} : {self.if_false.serialize(options)}" return to_dollar_string(result) -class BinaryTermRule(LarkRule): +class BinaryTermRule(Expression): _children: Tuple[ BinaryOperatorRule, @@ -116,28 +147,19 @@ def rule_name() -> str: return "binary_term" def __init__(self, children, meta: Optional[Meta] = None): - if len(children) == 2: - children.insert(1, None) + self._possibly_insert_null_comments(children, [1]) super().__init__(children, meta) @property def binary_operator(self) -> BinaryOperatorRule: return self._children[0] - @property - def comment(self) -> Optional[NewLineOrCommentRule]: - return self._children[1] - - @property - def has_comment(self) -> bool: - return self.comment is not None - @property def expr_term(self) -> ExprTermRule: return self._children[2] - def serialize(self) -> Any: - return f"{self.binary_operator.serialize()} {self.expr_term.serialize()}" + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return f"{self.binary_operator.serialize(options)} {self.expr_term.serialize(options)}" class BinaryOpRule(Expression): @@ -159,10 +181,14 @@ def expr_term(self) -> ExprTermRule: def binary_term(self) -> BinaryTermRule: return self._children[1] - def serialize(self) -> Any: - lhs = self.expr_term.serialize() - operator = self.binary_term.binary_operator.serialize() - rhs = self.binary_term.expr_term.serialize() + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + lhs = self.expr_term.serialize(options) + operator = self.binary_term.binary_operator.serialize(options) + rhs = self.binary_term.expr_term.serialize(options) + # below line is to avoid dollar string nested inside another dollar string, e.g.: + # hcl2: 15 + (10 * 12) + # desired json: "${15 + (10 * 12)}" + # undesired json: "${15 + ${(10 * 12)}}" rhs = unwrap_dollar_string(rhs) return to_dollar_string(f"{lhs} {operator} {rhs}") @@ -183,5 +209,5 @@ def operator(self) -> str: def expr_term(self): return self._children[1] - def serialize(self) -> Any: - return to_dollar_string(f"{self.operator}{self.expr_term.serialize()}") + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return to_dollar_string(f"{self.operator}{self.expr_term.serialize(options)}") diff --git a/hcl2/rule_transformer/rules/token_sequence.py b/hcl2/rule_transformer/rules/token_sequence.py index 66e22e2f..174e2510 100644 --- a/hcl2/rule_transformer/rules/token_sequence.py +++ b/hcl2/rule_transformer/rules/token_sequence.py @@ -1,9 +1,10 @@ from abc import ABC -from typing import Tuple, Any, List, Optional +from typing import Tuple, Any, List, Optional, Type from lark.tree import Meta from hcl2.rule_transformer.rules.abstract import TokenSequence, LarkRule, LarkToken +from hcl2.rule_transformer.utils import SerializationOptions class TokenSequenceRule(LarkRule, ABC): @@ -12,10 +13,13 @@ class TokenSequenceRule(LarkRule, ABC): def __init__(self, children: List[LarkToken], meta: Optional[Meta] = None): children = [TokenSequence(children)] - super().__init__(children) + super().__init__(children, meta) - def serialize(self) -> Any: - return self._children[0].joined() + def serialized_type(self) -> Type: + return str + + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return self.serialized_type()(self._children[0].serialize(options)) class IdentifierRule(TokenSequenceRule): @@ -23,17 +27,14 @@ class IdentifierRule(TokenSequenceRule): def rule_name() -> str: return "identifier" - def serialize(self) -> str: - return str(super().serialize()) - class IntLitRule(TokenSequenceRule): @staticmethod def rule_name() -> str: return "int_lit" - def serialize(self) -> float: - return int(super().serialize()) + def serialized_type(self) -> Type: + return int class FloatLitRule(TokenSequenceRule): @@ -41,23 +42,19 @@ class FloatLitRule(TokenSequenceRule): def rule_name() -> str: return "float_lit" - def serialize(self) -> float: - return float(super().serialize()) + def serialized_type(self) -> Type: + return float class StringLitRule(TokenSequenceRule): @staticmethod def rule_name() -> str: + # TODO actually this is a terminal, but it doesn't matter for lark.Transformer class; + # nevertheless, try to change it to a rule in hcl2.lark return "STRING_LIT" - def serialize(self) -> str: - return str(super().serialize()) - class BinaryOperatorRule(TokenSequenceRule): @staticmethod def rule_name() -> str: return "binary_operator" - - def serialize(self) -> str: - return str(super().serialize()) diff --git a/hcl2/rule_transformer/rules/whitespace.py b/hcl2/rule_transformer/rules/whitespace.py index f56a386e..b37cedc4 100644 --- a/hcl2/rule_transformer/rules/whitespace.py +++ b/hcl2/rule_transformer/rules/whitespace.py @@ -1,6 +1,7 @@ from typing import Optional, List, Any from hcl2.rule_transformer.rules.abstract import TokenSequence, LarkToken, LarkRule +from hcl2.rule_transformer.utils import SerializationOptions class NewLineOrCommentRule(LarkRule): @@ -11,11 +12,13 @@ class NewLineOrCommentRule(LarkRule): def rule_name() -> str: return "new_line_or_comment" - def serialize(self) -> Any: - return TokenSequence(self._children).joined() + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return TokenSequence(self._children).serialize(options) - def actual_comments(self) -> Optional[List[str]]: - comment = self.serialize() + def to_list( + self, options: SerializationOptions = SerializationOptions() + ) -> Optional[List[str]]: + comment = self.serialize(options) if comment == "\n": return None diff --git a/hcl2/rule_transformer/transformer.py b/hcl2/rule_transformer/transformer.py index 9e6af6ef..1c7d6157 100644 --- a/hcl2/rule_transformer/transformer.py +++ b/hcl2/rule_transformer/transformer.py @@ -40,13 +40,7 @@ class RuleTransformer(Transformer): def is_type_keyword(value: str) -> bool: return value in {"bool", "number", "string"} - def __init__(self, with_meta: bool = False, with_comments: bool = True): - """ - :param with_meta: If set to true then adds `__start_line__` and `__end_line__` - parameters to the output dict. Default to false. - """ - self._with_meta = with_meta - self._with_comments = with_comments + def __init__(self): super().__init__() def transform(self, tree: Tree[_Leaf_T]) -> LarkRule: @@ -113,6 +107,4 @@ def attribute(self, meta: Meta, args) -> AttributeRule: @v_args(meta=True) def new_line_or_comment(self, meta: Meta, args) -> NewLineOrCommentRule: - if self._with_comments: - return NewLineOrCommentRule(args, meta) - return Discard + return NewLineOrCommentRule(args, meta) diff --git a/hcl2/rule_transformer/utils.py b/hcl2/rule_transformer/utils.py index 060d3b53..e083d628 100644 --- a/hcl2/rule_transformer/utils.py +++ b/hcl2/rule_transformer/utils.py @@ -1,3 +1,12 @@ +from dataclasses import dataclass + + +@dataclass +class SerializationOptions: + with_comments: bool = True + with_meta: bool = False + + def is_dollar_string(value: str) -> bool: if not isinstance(value, str): return False From 448ffd42050489eb92bbc5855a0905b04436c51f Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Fri, 4 Apr 2025 10:29:47 +0200 Subject: [PATCH 04/24] comments --- hcl2/rule_transformer/rules/whitespace.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/hcl2/rule_transformer/rules/whitespace.py b/hcl2/rule_transformer/rules/whitespace.py index b37cedc4..96fe7c91 100644 --- a/hcl2/rule_transformer/rules/whitespace.py +++ b/hcl2/rule_transformer/rules/whitespace.py @@ -22,16 +22,19 @@ def to_list( if comment == "\n": return None - comment = comment.strip() comments = comment.split("\n") result = [] for comment in comments: - if comment.startswith("//"): - comment = comment[2:] + comment = comment.strip() - elif comment.startswith("#"): - comment = comment[1:] + for delimiter in ("//", "/*", "#"): + + if comment.startswith(delimiter): + comment = comment[len(delimiter) :] + + if comment.endswith("*/"): + comment = comment[:-2] if comment != "": result.append(comment.strip()) From 65f88bc3e7466b09108f4c0504c485d27e164558 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Wed, 2 Jul 2025 17:03:05 +0200 Subject: [PATCH 05/24] various changes --- hcl2/parser.py | 4 +- hcl2/rule_transformer/editor.py | 77 ++++++ hcl2/rule_transformer/hcl2.lark | 166 +++++++++++ hcl2/rule_transformer/processor.py | 258 ++++++++++++++++++ hcl2/rule_transformer/rules/abstract.py | 93 ++++--- hcl2/rule_transformer/rules/base.py | 53 ++-- hcl2/rule_transformer/rules/containers.py | 85 ++++++ hcl2/rule_transformer/rules/expression.py | 102 +++---- hcl2/rule_transformer/rules/indexing.py | 75 +++++ hcl2/rule_transformer/rules/literal_rules.py | 47 ++++ hcl2/rule_transformer/rules/strings.py | 50 ++++ hcl2/rule_transformer/rules/token_sequence.py | 116 ++++---- hcl2/rule_transformer/rules/tokens.py | 66 +++++ hcl2/rule_transformer/rules/tree.py | 106 +++++++ hcl2/rule_transformer/rules/whitespace.py | 46 +++- hcl2/rule_transformer/transformer.py | 103 +++++-- hcl2/rule_transformer/utils.py | 8 +- 17 files changed, 1232 insertions(+), 223 deletions(-) create mode 100644 hcl2/rule_transformer/editor.py create mode 100644 hcl2/rule_transformer/hcl2.lark create mode 100644 hcl2/rule_transformer/processor.py create mode 100644 hcl2/rule_transformer/rules/containers.py create mode 100644 hcl2/rule_transformer/rules/indexing.py create mode 100644 hcl2/rule_transformer/rules/literal_rules.py create mode 100644 hcl2/rule_transformer/rules/strings.py create mode 100644 hcl2/rule_transformer/rules/tokens.py create mode 100644 hcl2/rule_transformer/rules/tree.py diff --git a/hcl2/parser.py b/hcl2/parser.py index 79d50122..a0c87e34 100644 --- a/hcl2/parser.py +++ b/hcl2/parser.py @@ -12,7 +12,7 @@ def parser() -> Lark: """Build standard parser for transforming HCL2 text into python structures""" return Lark.open( - "hcl2.lark", + "rule_transformer/hcl2.lark.lark", parser="lalr", cache=str(PARSER_FILE), # Disable/Delete file to effect changes to the grammar rel_to=__file__, @@ -29,7 +29,7 @@ def reconstruction_parser() -> Lark: if necessary. """ return Lark.open( - "hcl2.lark", + "rule_transformer/hcl2.lark", parser="lalr", # Caching must be disabled to allow for reconstruction until lark-parser/lark#1472 is fixed: # diff --git a/hcl2/rule_transformer/editor.py b/hcl2/rule_transformer/editor.py new file mode 100644 index 00000000..9efce08f --- /dev/null +++ b/hcl2/rule_transformer/editor.py @@ -0,0 +1,77 @@ +import dataclasses +from copy import copy, deepcopy +from typing import List, Optional, Set, Tuple + +from hcl2.rule_transformer.rules.abstract import LarkRule +from hcl2.rule_transformer.rules.base import BlockRule, StartRule + + +@dataclasses.dataclass +class TreePathElement: + + name: str + index: int = 0 + + +@dataclasses.dataclass +class TreePath: + + elements: List[TreePathElement] = dataclasses.field(default_factory=list) + + @classmethod + def build(cls, elements: List[Tuple[str, Optional[int]] | str]): + results = [] + for element in elements: + if isinstance(element, tuple): + if len(element) == 1: + result = TreePathElement(element[0], 0) + else: + result = TreePathElement(*element) + else: + result = TreePathElement(element, 0) + + results.append(result) + + return cls(results) + + def __iter__(self): + return self.elements.__iter__() + + def __len__(self): + return self.elements.__len__() + + +class Editor: + def __init__(self, rules_tree: LarkRule): + self.rules_tree = rules_tree + + @classmethod + def _find_one(cls, rules_tree: LarkRule, path_element: TreePathElement) -> LarkRule: + return cls._find_all(rules_tree, path_element.name)[path_element.index] + + @classmethod + def _find_all(cls, rules_tree: LarkRule, rule_name: str) -> List[LarkRule]: + children = [] + print("rule", rules_tree) + print("rule children", rules_tree.children) + for child in rules_tree.children: + if isinstance(child, LarkRule) and child.lark_name() == rule_name: + children.append(child) + + return children + + def find_by_path(self, path: TreePath, rule_name: str) -> List[LarkRule]: + path = deepcopy(path.elements) + + current_rule = self.rules_tree + while len(path) > 0: + current_path, *path = path + print(current_path, path) + current_rule = self._find_one(current_rule, current_path) + + return self._find_all(current_rule, rule_name) + + # def visit(self, path: TreePath) -> "Editor": + # + # while len(path) > 1: + # current = diff --git a/hcl2/rule_transformer/hcl2.lark b/hcl2/rule_transformer/hcl2.lark new file mode 100644 index 00000000..a7722118 --- /dev/null +++ b/hcl2/rule_transformer/hcl2.lark @@ -0,0 +1,166 @@ +// ============================================================================ +// Terminals +// ============================================================================ + +// Whitespace and Comments +NL_OR_COMMENT: /\n[ \t]*/ | /#.*\n/ | /\/\/.*\n/ | /\/\*(.|\n)*?(\*\/)/ + +// Keywords +IF : "if" +IN : "in" +FOR : "for" +FOR_EACH : "for_each" + +// Identifiers and Names +NAME : /[a-zA-Z_][a-zA-Z0-9_-]*/ +IDENTIFIER: NAME | IN | FOR | IF | FOR_EACH + +// Literals +ESCAPED_INTERPOLATION.2: /\$\$\{[^}]*\}/ +STRING_CHARS.1: /(?:(?!\$\$\{)(?!\$\{)[^"\\]|\\.|(?:\$(?!\$?\{)))+/ +DECIMAL : "0".."9" +NEGATIVE_DECIMAL : "-" DECIMAL +EXP_MARK : ("e" | "E") ("+" | "-")? DECIMAL+ +INT_LITERAL: NEGATIVE_DECIMAL? DECIMAL+ +FLOAT_LITERAL: (NEGATIVE_DECIMAL? DECIMAL+ | NEGATIVE_DECIMAL+) "." DECIMAL+ (EXP_MARK)? + | (NEGATIVE_DECIMAL? DECIMAL+ | NEGATIVE_DECIMAL+) (EXP_MARK) + +// Operators +BINARY_OP : DOUBLE_EQ | NEQ | LT | GT | LEQ | GEQ | MINUS | ASTERISK | SLASH | PERCENT | DOUBLE_AMP | DOUBLE_PIPE | PLUS +DOUBLE_EQ : "==" +NEQ : "!=" +LT : "<" +GT : ">" +LEQ : "<=" +GEQ : ">=" +MINUS : "-" +ASTERISK : "*" +SLASH : "/" +PERCENT : "%" +DOUBLE_AMP : "&&" +DOUBLE_PIPE : "||" +PLUS : "+" +NOT : "!" +QMARK : "?" + +// Punctuation +LPAR : "(" +RPAR : ")" +LBRACE : "{" +RBRACE : "}" +LSQB : "[" +RSQB : "]" +COMMA : "," +DOT : "." +EQ : /[ \t]*=(?!=|>)/ +COLON : ":" +DBLQUOTE : "\"" + +// Interpolation +INTERP_START : "${" + +// Splat Operators +ATTR_SPLAT : ".*" +FULL_SPLAT_START : "[*]" + +// Special Operators +FOR_OBJECT_ARROW : "=>" +ELLIPSIS : "..." +COLONS: "::" + +// Heredocs +HEREDOC_TEMPLATE : /<<(?P[a-zA-Z][a-zA-Z0-9._-]+)\n?(?:.|\n)*?\n\s*(?P=heredoc)\n/ +HEREDOC_TEMPLATE_TRIM : /<<-(?P[a-zA-Z][a-zA-Z0-9._-]+)\n?(?:.|\n)*?\n\s*(?P=heredoc_trim)\n/ + +// Ignore whitespace (but not newlines, as they're significant in HCL) +%ignore /[ \t]+/ + +// ============================================================================ +// Rules +// ============================================================================ + +// Top-level structure +start : body + +// Body and basic constructs +body : (new_line_or_comment? (attribute | block))* new_line_or_comment? +attribute : identifier EQ expression +block : identifier (identifier | string)* new_line_or_comment? LBRACE body RBRACE + +// Whitespace and comments +new_line_or_comment: ( NL_OR_COMMENT )+ + +// Basic literals and identifiers +identifier: IDENTIFIER +int_lit: INT_LITERAL +float_lit: FLOAT_LITERAL +string: DBLQUOTE string_part* DBLQUOTE +string_part: STRING_CHARS + | ESCAPED_INTERPOLATION + | interpolation + +// Expressions +?expression : expr_term | operation | conditional +interpolation: INTERP_START expression RBRACE +conditional : expression QMARK new_line_or_comment? expression new_line_or_comment? COLON new_line_or_comment? expression + +// Operations +?operation : unary_op | binary_op +!unary_op : (MINUS | NOT) expr_term +binary_op : expression binary_term new_line_or_comment? +binary_term : binary_operator new_line_or_comment? expression +!binary_operator : BINARY_OP + +// Expression terms +expr_term : LPAR new_line_or_comment? expression new_line_or_comment? RPAR + | float_lit + | int_lit + | string + | tuple + | object + | function_call + | index_expr_term + | get_attr_expr_term + | identifier + | provider_function_call + | heredoc_template + | heredoc_template_trim + | attr_splat_expr_term + | full_splat_expr_term + | for_tuple_expr + | for_object_expr + +// Collections +tuple : LSQB (new_line_or_comment* expression new_line_or_comment* COMMA)* (new_line_or_comment* expression)? new_line_or_comment* RSQB +object : LBRACE new_line_or_comment? (new_line_or_comment* (object_elem | (object_elem COMMA)) new_line_or_comment*)* RBRACE +object_elem : object_elem_key ( EQ | COLON ) expression +object_elem_key : float_lit | int_lit | identifier | string | object_elem_key_dot_accessor | object_elem_key_expression +object_elem_key_expression : LPAR expression RPAR +object_elem_key_dot_accessor : identifier (DOT identifier)+ + +// Heredocs +heredoc_template : HEREDOC_TEMPLATE +heredoc_template_trim : HEREDOC_TEMPLATE_TRIM + +// Functions +function_call : identifier LPAR new_line_or_comment? arguments? new_line_or_comment? RPAR +arguments : (expression (new_line_or_comment* COMMA new_line_or_comment* expression)* (COMMA | ELLIPSIS)? new_line_or_comment*) +provider_function_call: identifier COLONS identifier COLONS identifier LPAR new_line_or_comment? arguments? new_line_or_comment? RPAR + +// Indexing and attribute access +index_expr_term : expr_term index +get_attr_expr_term : expr_term get_attr +attr_splat_expr_term : expr_term attr_splat +full_splat_expr_term : expr_term full_splat +?index : braces_index | short_index +braces_index : LSQB new_line_or_comment? expression new_line_or_comment? RSQB +short_index : DOT INT_LITERAL +get_attr : DOT identifier +attr_splat : ATTR_SPLAT get_attr* +full_splat : FULL_SPLAT_START (get_attr | index)* + +// For expressions +!for_tuple_expr : LSQB new_line_or_comment? for_intro new_line_or_comment? expression new_line_or_comment? for_cond? new_line_or_comment? RSQB +!for_object_expr : LBRACE new_line_or_comment? for_intro new_line_or_comment? expression FOR_OBJECT_ARROW new_line_or_comment? expression new_line_or_comment? ELLIPSIS? new_line_or_comment? for_cond? new_line_or_comment? RBRACE +!for_intro : FOR new_line_or_comment? identifier (COMMA identifier new_line_or_comment?)? new_line_or_comment? IN new_line_or_comment? expression new_line_or_comment? COLON new_line_or_comment? +!for_cond : IF new_line_or_comment? expression diff --git a/hcl2/rule_transformer/processor.py b/hcl2/rule_transformer/processor.py new file mode 100644 index 00000000..b854aff5 --- /dev/null +++ b/hcl2/rule_transformer/processor.py @@ -0,0 +1,258 @@ +from copy import copy, deepcopy +from typing import ( + List, + Optional, + Union, + Callable, + Any, + Tuple, + Generic, + TypeVar, + cast, + Generator, +) + +from hcl2.rule_transformer.rules.abstract import LarkRule, LarkElement +from hcl2.rule_transformer.rules.base import BlockRule, AttributeRule +from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule + +T = TypeVar("T", bound=LarkRule) + + +class RulesProcessor(Generic[T]): + """""" + + @classmethod + def _traverse( + cls, + node: T, + predicate: Callable[[T], bool], + current_depth: int = 0, + max_depth: Optional[int] = None, + ) -> List["RulesProcessor"]: + + results = [] + + if predicate(node): + results.append(cls(node)) + + if max_depth is not None and current_depth >= max_depth: + return results + + for child in node.children: + if child is None or not isinstance(child, LarkRule): + continue + + child_results = cls._traverse( + child, + predicate, + current_depth + 1, + max_depth, + ) + results.extend(child_results) + + return results + + def __init__(self, node: LarkRule): + self.node = node + + @property + def siblings(self): + if self.node.parent is None: + return None + return self.node.parent.children + + @property + def next_siblings(self): + if self.node.parent is None: + return None + return self.node.parent.children[self.node.index + 1 :] + + @property + def previous_siblings(self): + if self.node.parent is None: + return None + return self.node.parent.children[: self.node.index - 1] + + def walk(self) -> Generator[Tuple["RulesProcessor", List["RulesProcessor"]]]: + child_processors = [self.__class__(child) for child in self.node.children] + yield self, child_processors + for processor in child_processors: + if isinstance(processor.node, LarkRule): + for result in processor.walk(): + yield result + + def find_block( + self, + labels: List[str], + exact_match: bool = True, + max_depth: Optional[int] = None, + ) -> "RulesProcessor[BlockRule]": + return self.find_blocks(labels, exact_match, max_depth)[0] + + def find_blocks( + self, + labels: List[str], + exact_match: bool = True, + max_depth: Optional[int] = None, + ) -> List["RulesProcessor[BlockRule]"]: + """ + Find blocks by their labels. + + Args: + labels: List of label strings to match + exact_match: If True, all labels must match exactly. If False, labels can be a subset. + max_depth: Maximum depth to search + + Returns: + ... + """ + + def block_predicate(node: LarkRule) -> bool: + if not isinstance(node, BlockRule): + return False + + node_labels = [label.serialize() for label in node.labels] + + if exact_match: + return node_labels == labels + else: + # Check if labels is a prefix of node_labels + if len(labels) > len(node_labels): + return False + return node_labels[: len(labels)] == labels + + return cast( + List[RulesProcessor[BlockRule]], + self._traverse(self.node, block_predicate, max_depth=max_depth), + ) + + def attribute( + self, name: str, max_depth: Optional[int] = None + ) -> "RulesProcessor[AttributeRule]": + return self.find_attributes(name, max_depth)[0] + + def find_attributes( + self, name: str, max_depth: Optional[int] = None + ) -> List["RulesProcessor[AttributeRule]"]: + """ + Find attributes by their identifier name. + + Args: + name: Attribute name to search for + max_depth: Maximum depth to search + + Returns: + List of TreePath objects for matching attributes + """ + + def attribute_predicate(node: LarkRule) -> bool: + if not isinstance(node, AttributeRule): + return False + return node.identifier.serialize() == name + + return self._traverse(self.node, attribute_predicate, max_depth=max_depth) + + def rule(self, rule_name: str, max_depth: Optional[int] = None): + return self.find_rules(rule_name, max_depth)[0] + + def find_rules( + self, rule_name: str, max_depth: Optional[int] = None + ) -> List["RulesProcessor"]: + """ + Find all rules of a specific type. + + Args: + rule_name: Name of the rule type to find + max_depth: Maximum depth to search + + Returns: + List of TreePath objects for matching rules + """ + + def rule_predicate(node: LarkRule) -> bool: + return node.lark_name() == rule_name + + return self._traverse(self.node, rule_predicate, max_depth=max_depth) + + def find_by_predicate( + self, predicate: Callable[[LarkRule], bool], max_depth: Optional[int] = None + ) -> List["RulesProcessor"]: + """ + Find all rules matching a custom predicate. + + Args: + predicate: Function that returns True for nodes to collect + max_depth: Maximum depth to search + + Returns: + List of TreePath objects for matching rules + """ + return self._traverse(self.node, predicate, max_depth) + + # Convenience methods + def get_all_blocks(self, max_depth: Optional[int] = None) -> List: + """Get all blocks in the tree.""" + return self.find_rules("block", max_depth) + + def get_all_attributes( + self, max_depth: Optional[int] = None + ) -> List["RulesProcessor"]: + """Get all attributes in the tree.""" + return self.find_rules("attribute", max_depth) + + def previous(self, skip_new_line: bool = True) -> Optional["RulesProcessor"]: + """Get the next sibling node.""" + if self.node.parent is None: + return None + + for sibling in reversed(self.previous_siblings): + if sibling is not None and isinstance(sibling, LarkRule): + if skip_new_line and isinstance(sibling, NewLineOrCommentRule): + continue + return self.__class__(sibling) + + def next(self, skip_new_line: bool = True) -> Optional["RulesProcessor"]: + """Get the next sibling node.""" + if self.node.parent is None: + return None + + for sibling in self.next_siblings: + if sibling is not None and isinstance(sibling, LarkRule): + if skip_new_line and isinstance(sibling, NewLineOrCommentRule): + continue + return self.__class__(sibling) + + def append_child( + self, new_node: LarkRule, indentation: bool = True + ) -> "RulesProcessor": + children = self.node.children + if indentation: + if isinstance(children[-1], NewLineOrCommentRule): + children.pop() + children.append(NewLineOrCommentRule.from_string("\n ")) + + new_node = deepcopy(new_node) + new_node.set_parent(self.node) + new_node.set_index(len(children)) + children.append(new_node) + return self.__class__(new_node) + + def replace(self, new_node: LarkRule) -> "RulesProcessor": + new_node = deepcopy(new_node) + + self.node.parent.children.pop(self.node.index) + self.node.parent.children.insert(self.node.index, new_node) + new_node.set_parent(self.node.parent) + new_node.set_index(self.node.index) + return self.__class__(new_node) + + # def insert_before(self, new_node: LarkRule) -> bool: + # """Insert a new node before this one.""" + # if self.parent is None or self.parent_index < 0: + # return False + # + # try: + # self.parent.children.insert(self.parent_index, new_node) + # except (IndexError, AttributeError): + # return False diff --git a/hcl2/rule_transformer/rules/abstract.py b/hcl2/rule_transformer/rules/abstract.py index 6c650ea3..d3a3b634 100644 --- a/hcl2/rule_transformer/rules/abstract.py +++ b/hcl2/rule_transformer/rules/abstract.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from typing import Any, Union, List, Optional +from typing import Any, Union, List, Optional, Tuple, Callable from lark import Token, Tree from lark.tree import Meta @@ -8,8 +8,23 @@ class LarkElement(ABC): + @property + @abstractmethod + def lark_name(self) -> str: + raise NotImplementedError() + + def __init__(self, index: int = -1, parent: "LarkElement" = None): + self._index = index + self._parent = parent + + def set_index(self, i: int): + self._index = i + + def set_parent(self, node: "LarkElement"): + self._parent = node + @abstractmethod - def reverse(self) -> Any: + def to_lark(self) -> Any: raise NotImplementedError() @abstractmethod @@ -17,53 +32,42 @@ def serialize(self, options: SerializationOptions = SerializationOptions()) -> A raise NotImplementedError() -class LarkToken(LarkElement): - def __init__(self, name: str, value: Union[str, int]): - self._name = name +class LarkToken(LarkElement, ABC): + def __init__(self, value: Union[str, int]): self._value = value + super().__init__() @property - def name(self) -> str: - return self._name + @abstractmethod + def lark_name(self) -> str: + raise NotImplementedError() + + @property + @abstractmethod + def serialize_conversion(self) -> Callable: + raise NotImplementedError() @property def value(self): return self._value - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: - return self._value + def serialize(self, options: SerializationOptions = SerializationOptions()): + return self.serialize_conversion(self.value) - def reverse(self) -> Token: - return Token(self.name, self.value) + def to_lark(self) -> Token: + return Token(self.lark_name, self.value) def __str__(self) -> str: return str(self._value) def __repr__(self) -> str: - return f"" - - -EQ_Token = LarkToken -COLON_TOKEN = LarkToken -LPAR_TOKEN = LarkToken # left parenthesis -RPAR_TOKEN = LarkToken # right parenthesis - - -class TokenSequence(LarkElement): - def __init__(self, tokens: List[LarkToken]): - self.tokens = tokens - - def reverse(self) -> List[Token]: - return [token.reverse() for token in self.tokens] - - def serialize(self, options: SerializationOptions = SerializationOptions()): - return "".join(str(token) for token in self.tokens) + return f"" class LarkRule(LarkElement, ABC): - @staticmethod + @property @abstractmethod - def rule_name() -> str: + def lark_name(self) -> str: raise NotImplementedError() @abstractmethod @@ -74,22 +78,33 @@ def serialize(self, options: SerializationOptions = SerializationOptions()) -> A def children(self) -> List[LarkElement]: return self._children - def reverse(self) -> Tree: + @property + def parent(self): + return self._parent + + @property + def index(self): + return self._index + + def to_lark(self) -> Tree: result_children = [] for child in self._children: if child is None: continue - if isinstance(child, TokenSequence): - result_children.extend(child.reverse()) - else: - result_children.append(child.reverse()) + result_children.append(child.to_lark()) - return Tree(self.rule_name(), result_children, meta=self._meta) + return Tree(self.lark_name, result_children, meta=self._meta) - def __init__(self, children: List, meta: Optional[Meta] = None): + def __init__(self, children: List[LarkElement], meta: Optional[Meta] = None): + super().__init__() self._children = children self._meta = meta + for index, child in enumerate(children): + if child is not None: + child.set_index(index) + child.set_parent(self) + def __repr__(self): - return f"" + return f"" diff --git a/hcl2/rule_transformer/rules/base.py b/hcl2/rule_transformer/rules/base.py index 76d014e9..6d0c4924 100644 --- a/hcl2/rule_transformer/rules/base.py +++ b/hcl2/rule_transformer/rules/base.py @@ -3,9 +3,9 @@ from lark.tree import Meta -from hcl2.rule_transformer.rules.abstract import LarkRule, EQ_Token +from hcl2.rule_transformer.rules.abstract import LarkRule, LarkToken from hcl2.rule_transformer.rules.expression import Expression -from hcl2.rule_transformer.rules.token_sequence import IdentifierRule +from hcl2.rule_transformer.rules.tokens import IdentifierToken, EQ_TOKEN from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule from hcl2.rule_transformer.utils import SerializationOptions @@ -13,17 +13,17 @@ class AttributeRule(LarkRule): _children: Tuple[ - IdentifierRule, - EQ_Token, + IdentifierToken, + EQ_TOKEN, Expression, ] - @staticmethod - def rule_name() -> str: + @property + def lark_name(self) -> str: return "attribute" @property - def identifier(self) -> IdentifierRule: + def identifier(self) -> IdentifierToken: return self._children[0] @property @@ -39,13 +39,13 @@ class BodyRule(LarkRule): _children: List[ Union[ NewLineOrCommentRule, - AttributeRule, + # AttributeRule, "BlockRule", ] ] - @staticmethod - def rule_name() -> str: + @property + def lark_name(self) -> str: return "body" def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: @@ -71,13 +71,7 @@ def serialize(self, options: SerializationOptions = SerializationOptions()) -> A result = {} for attribute in attributes: - result.update( - { - attribute.identifier.serialize( - options - ): attribute.expression.serialize(options) - } - ) + result.update(attribute.serialize(options)) result_blocks = defaultdict(list) for block in blocks: @@ -101,14 +95,14 @@ class StartRule(LarkRule): _children: Tuple[BodyRule] - @staticmethod - def rule_name() -> str: - return "start" - @property def body(self) -> BodyRule: return self._children[0] + @property + def lark_name(self) -> str: + return "start" + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: return self.body.serialize(options) @@ -117,16 +111,19 @@ class BlockRule(LarkRule): _children: Tuple[BodyRule] - @staticmethod - def rule_name() -> str: - return "block" - def __init__(self, children, meta: Optional[Meta] = None): super().__init__(children, meta) - *self._labels, self._body = children + + *self._labels, self._body = [ + child for child in children if not isinstance(child, LarkToken) + ] + + @property + def lark_name(self) -> str: + return "block" @property - def labels(self) -> List[IdentifierRule]: + def labels(self) -> List[IdentifierToken]: return list(filter(lambda label: label is not None, self._labels)) @property @@ -138,6 +135,6 @@ def serialize( ) -> BodyRule: result = self._body.serialize(options) labels = self._labels - for label in reversed(labels[1:]): + for label in reversed(labels): result = {label.serialize(options): result} return result diff --git a/hcl2/rule_transformer/rules/containers.py b/hcl2/rule_transformer/rules/containers.py new file mode 100644 index 00000000..c39f3ba2 --- /dev/null +++ b/hcl2/rule_transformer/rules/containers.py @@ -0,0 +1,85 @@ +from typing import Tuple, List, Optional, Union, Any + +from hcl2.rule_transformer.rules.abstract import LarkRule +from hcl2.rule_transformer.rules.expression import Expression +from hcl2.rule_transformer.rules.literal_rules import ( + FloatLitRule, + IntLitRule, + IdentifierRule, +) +from hcl2.rule_transformer.rules.strings import StringRule +from hcl2.rule_transformer.rules.tokens import ( + COLON_TOKEN, + EQ_TOKEN, + LBRACE_TOKEN, + COMMA_TOKEN, + RBRACE_TOKEN, +) +from hcl2.rule_transformer.rules.whitespace import ( + NewLineOrCommentRule, + InlineCommentMixIn, +) +from hcl2.rule_transformer.utils import SerializationOptions + + +class ObjectElemKeyRule(LarkRule): + _children: Tuple[Union[FloatLitRule, IntLitRule, IdentifierRule, StringRule]] + + @staticmethod + def lark_name() -> str: + return "object_elem_key" + + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return self.children[0].serialize(options) + + +class ObjectElemRule(LarkRule): + + _children: Tuple[ + ObjectElemKeyRule, + Union[EQ_TOKEN, COLON_TOKEN], + Expression, + ] + + @staticmethod + def lark_name() -> str: + return "object_elem" + + @property + def key(self) -> ObjectElemKeyRule: + return self.children[0] + + @property + def expression(self): + return self.children[2] + + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return { + self.children[0].serialize(options): self.children[2].serialize(options) + } + + +class ObjectRule(InlineCommentMixIn): + + _children: Tuple[ + LBRACE_TOKEN, + Optional[NewLineOrCommentRule], + Tuple[Union[ObjectElemRule, Optional[COMMA_TOKEN], NewLineOrCommentRule], ...], + RBRACE_TOKEN, + ] + + @staticmethod + def lark_name() -> str: + return "object" + + @property + def elements(self) -> List[ObjectElemRule]: + return [ + child for child in self.children[1:-1] if isinstance(child, ObjectElemRule) + ] + + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + result = {} + for element in self.elements: + result.update(element.serialize()) + return result diff --git a/hcl2/rule_transformer/rules/expression.py b/hcl2/rule_transformer/rules/expression.py index 16daf310..8a03f813 100644 --- a/hcl2/rule_transformer/rules/expression.py +++ b/hcl2/rule_transformer/rules/expression.py @@ -1,17 +1,18 @@ from abc import ABC -from typing import Any, Tuple, Optional, List +from copy import deepcopy +from typing import Any, Tuple, Optional -from lark import Tree, Token from lark.tree import Meta from hcl2.rule_transformer.rules.abstract import ( - LarkRule, LarkToken, - LPAR_TOKEN, - RPAR_TOKEN, ) -from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule -from hcl2.rule_transformer.rules.token_sequence import BinaryOperatorRule +from hcl2.rule_transformer.rules.literal_rules import BinaryOperatorRule +from hcl2.rule_transformer.rules.tokens import LPAR_TOKEN, RPAR_TOKEN, QMARK_TOKEN, COLON_TOKEN +from hcl2.rule_transformer.rules.whitespace import ( + NewLineOrCommentRule, + InlineCommentMixIn, +) from hcl2.rule_transformer.utils import ( wrap_into_parentheses, to_dollar_string, @@ -20,36 +21,14 @@ ) -class Expression(LarkRule, ABC): - @staticmethod - def rule_name() -> str: +class Expression(InlineCommentMixIn, ABC): + @property + def lark_name(self) -> str: return "expression" def __init__(self, children, meta: Optional[Meta] = None): super().__init__(children, meta) - def inline_comments(self): - result = [] - for child in self._children: - - if isinstance(child, NewLineOrCommentRule): - result.extend(child.to_list()) - - elif isinstance(child, Expression): - result.extend(child.inline_comments()) - - return result - - def _possibly_insert_null_comments(self, children: List, indexes: List[int] = None): - for index in indexes: - try: - child = children[index] - except IndexError: - children.insert(index, None) - else: - if not isinstance(child, NewLineOrCommentRule): - children.insert(index, None) - class ExprTermRule(Expression): @@ -63,17 +42,17 @@ class ExprTermRule(Expression): _children: type_ - @staticmethod - def rule_name() -> str: + @property + def lark_name(self) -> str: return "expr_term" def __init__(self, children, meta: Optional[Meta] = None): self._parentheses = False if ( isinstance(children[0], LarkToken) - and children[0].name == "LPAR" + and children[0].lark_name == "LPAR" and isinstance(children[-1], LarkToken) - and children[-1].name == "RPAR" + and children[-1].lark_name == "RPAR" ): self._parentheses = True else: @@ -90,11 +69,14 @@ def parentheses(self) -> bool: def expression(self) -> Expression: return self._children[2] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + def serialize(self , unwrap: bool = False, options: SerializationOptions = SerializationOptions()) -> Any: result = self.expression.serialize(options) if self.parentheses: result = wrap_into_parentheses(result) result = to_dollar_string(result) + + if options.unwrap_dollar_string: + result = unwrap_dollar_string(result) return result @@ -102,19 +84,21 @@ class ConditionalRule(Expression): _children: Tuple[ Expression, + QMARK_TOKEN, Optional[NewLineOrCommentRule], Expression, Optional[NewLineOrCommentRule], + COLON_TOKEN, Optional[NewLineOrCommentRule], Expression, ] - @staticmethod - def rule_name(): + @property + def lark_name(self) -> str: return "conditional" def __init__(self, children, meta: Optional[Meta] = None): - self._possibly_insert_null_comments(children, [1, 3, 4]) + self._possibly_insert_null_comments(children, [2, 4, 6]) super().__init__(children, meta) @property @@ -123,13 +107,15 @@ def condition(self) -> Expression: @property def if_true(self) -> Expression: - return self._children[2] + return self._children[3] @property def if_false(self) -> Expression: - return self._children[5] + return self._children[7] def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + options = options.replace(unwrap_dollar_string=True) + print(self.condition) result = f"{self.condition.serialize(options)} ? {self.if_true.serialize(options)} : {self.if_false.serialize(options)}" return to_dollar_string(result) @@ -142,8 +128,8 @@ class BinaryTermRule(Expression): ExprTermRule, ] - @staticmethod - def rule_name() -> str: + @property + def lark_name(self) -> str: return "binary_term" def __init__(self, children, meta: Optional[Meta] = None): @@ -166,11 +152,11 @@ class BinaryOpRule(Expression): _children: Tuple[ ExprTermRule, BinaryTermRule, - NewLineOrCommentRule, + Optional[NewLineOrCommentRule], ] - @staticmethod - def rule_name() -> str: + @property + def lark_name(self) -> str: return "binary_op" @property @@ -182,23 +168,23 @@ def binary_term(self) -> BinaryTermRule: return self._children[1] def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: - lhs = self.expr_term.serialize(options) - operator = self.binary_term.binary_operator.serialize(options) - rhs = self.binary_term.expr_term.serialize(options) - # below line is to avoid dollar string nested inside another dollar string, e.g.: - # hcl2: 15 + (10 * 12) - # desired json: "${15 + (10 * 12)}" - # undesired json: "${15 + ${(10 * 12)}}" - rhs = unwrap_dollar_string(rhs) - return to_dollar_string(f"{lhs} {operator} {rhs}") + children_options = options.replace(unwrap_dollar_string=True) + lhs = self.expr_term.serialize(children_options) + operator = self.binary_term.binary_operator.serialize(children_options) + rhs = self.binary_term.expr_term.serialize(children_options) + + result = f"{lhs} {operator} {rhs}" + if options.unwrap_dollar_string: + return result + return to_dollar_string(result) class UnaryOpRule(Expression): _children: Tuple[LarkToken, ExprTermRule] - @staticmethod - def rule_name() -> str: + @property + def lark_name(self) -> str: return "unary_op" @property diff --git a/hcl2/rule_transformer/rules/indexing.py b/hcl2/rule_transformer/rules/indexing.py new file mode 100644 index 00000000..ce23d040 --- /dev/null +++ b/hcl2/rule_transformer/rules/indexing.py @@ -0,0 +1,75 @@ +from typing import List, Optional, Tuple, Any + +from lark.tree import Meta + +from hcl2.rule_transformer.rules.abstract import LarkRule +from hcl2.rule_transformer.rules.expression import ExprTermRule, Expression +from hcl2.rule_transformer.rules.tokens import ( + DOT_TOKEN, + IntToken, + LSQB_TOKEN, + RSQB_TOKEN, +) +from hcl2.rule_transformer.rules.whitespace import ( + InlineCommentMixIn, + NewLineOrCommentRule, +) +from hcl2.rule_transformer.utils import SerializationOptions, to_dollar_string + + +class ShortIndexRule(LarkRule): + + _children: Tuple[ + DOT_TOKEN, + IntToken, + ] + + @property + def lark_name(self) -> str: + return "short_index" + + @property + def index(self): + return self.children[1] + + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return f".{self.index.serialize(options)}" + + +class SqbIndex(InlineCommentMixIn): + _children: Tuple[ + LSQB_TOKEN, + Optional[NewLineOrCommentRule], + ExprTermRule, + Optional[NewLineOrCommentRule], + RSQB_TOKEN, + ] + + @property + def lark_name(self) -> str: + return "braces_index" + + @property + def index_expression(self): + return self.children[2] + + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return f"[{self.index_expression.serialize(options)}]" + + def __init__(self, children, meta: Optional[Meta] = None): + self._possibly_insert_null_comments(children, [1, 3]) + super().__init__(children, meta) + + +class IndexExprTermRule(Expression): + + _children: Tuple[ExprTermRule, SqbIndex] + + @property + def lark_name(self) -> str: + return "index_expr_term" + + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return to_dollar_string( + f"{self.children[0].serialize(options)}{self.children[1].serialize(options)}" + ) diff --git a/hcl2/rule_transformer/rules/literal_rules.py b/hcl2/rule_transformer/rules/literal_rules.py new file mode 100644 index 00000000..06ca99ae --- /dev/null +++ b/hcl2/rule_transformer/rules/literal_rules.py @@ -0,0 +1,47 @@ +from abc import ABC +from typing import Any, Tuple + +from hcl2.rule_transformer.rules.abstract import LarkRule, LarkToken +from hcl2.rule_transformer.utils import SerializationOptions + + +class TokenRule(LarkRule, ABC): + + _children: Tuple[LarkToken] + + @property + def token(self) -> LarkToken: + return self._children[0] + + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return self.token.serialize() + + +class IdentifierRule(TokenRule): + @property + def lark_name(self) -> str: + return "identifier" + + +class IntLitRule(TokenRule): + @property + def lark_name(self) -> str: + return "int_lit" + + +class FloatLitRule(TokenRule): + @property + def lark_name(self) -> str: + return "float_lit" + + +class StringPartRule(TokenRule): + @property + def lark_name(self) -> str: + return "string" + + +class BinaryOperatorRule(TokenRule): + @property + def lark_name(self) -> str: + return "binary_operator" diff --git a/hcl2/rule_transformer/rules/strings.py b/hcl2/rule_transformer/rules/strings.py new file mode 100644 index 00000000..0f53c55a --- /dev/null +++ b/hcl2/rule_transformer/rules/strings.py @@ -0,0 +1,50 @@ +from typing import Tuple, Optional, List, Any, Union + +from lark.tree import Meta + +from hcl2.rule_transformer.rules.abstract import LarkRule +from hcl2.rule_transformer.rules.expression import Expression, ExprTermRule +from hcl2.rule_transformer.rules.literal_rules import StringPartRule +from hcl2.rule_transformer.rules.tokens import ( + INTERP_START_TOKEN, + RBRACE_TOKEN, + DBLQUOTE_TOKEN, + STRING_CHARS_TOKEN, +) +from hcl2.rule_transformer.utils import SerializationOptions + + +class StringRule(LarkRule): + + _children: Tuple[DBLQUOTE_TOKEN, List[StringPartRule], DBLQUOTE_TOKEN] + + @property + def lark_name(self) -> str: + return "string" + + @property + def string_parts(self): + return self.children[1:-1] + + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return '"' + "".join(part.serialize() for part in self.string_parts) + '"' + + +class InterpolationRule(LarkRule): + + _children: Tuple[ + INTERP_START_TOKEN, + Expression, + RBRACE_TOKEN, + ] + + @property + def lark_name(self) -> str: + return "interpolation" + + @property + def expression(self): + return self.children[1] + + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return "${" + self.expression.serialize(options) + "}" diff --git a/hcl2/rule_transformer/rules/token_sequence.py b/hcl2/rule_transformer/rules/token_sequence.py index 174e2510..66d780b3 100644 --- a/hcl2/rule_transformer/rules/token_sequence.py +++ b/hcl2/rule_transformer/rules/token_sequence.py @@ -1,60 +1,56 @@ -from abc import ABC -from typing import Tuple, Any, List, Optional, Type - -from lark.tree import Meta - -from hcl2.rule_transformer.rules.abstract import TokenSequence, LarkRule, LarkToken -from hcl2.rule_transformer.utils import SerializationOptions - - -class TokenSequenceRule(LarkRule, ABC): - - _children: Tuple[TokenSequence] - - def __init__(self, children: List[LarkToken], meta: Optional[Meta] = None): - children = [TokenSequence(children)] - super().__init__(children, meta) - - def serialized_type(self) -> Type: - return str - - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: - return self.serialized_type()(self._children[0].serialize(options)) - - -class IdentifierRule(TokenSequenceRule): - @staticmethod - def rule_name() -> str: - return "identifier" - - -class IntLitRule(TokenSequenceRule): - @staticmethod - def rule_name() -> str: - return "int_lit" - - def serialized_type(self) -> Type: - return int - - -class FloatLitRule(TokenSequenceRule): - @staticmethod - def rule_name() -> str: - return "float_lit" - - def serialized_type(self) -> Type: - return float - - -class StringLitRule(TokenSequenceRule): - @staticmethod - def rule_name() -> str: - # TODO actually this is a terminal, but it doesn't matter for lark.Transformer class; - # nevertheless, try to change it to a rule in hcl2.lark - return "STRING_LIT" - - -class BinaryOperatorRule(TokenSequenceRule): - @staticmethod - def rule_name() -> str: - return "binary_operator" +# from abc import ABC +# from typing import Tuple, Any, List, Optional, Type +# +# from lark.tree import Meta +# +# from hcl2.rule_transformer.rules.abstract import LarkRule, LarkToken +# from hcl2.rule_transformer.utils import SerializationOptions +# +# +# class TokenSequenceRule(LarkRule, ABC): +# +# _children: Tuple[TokenSequence] +# +# def __init__(self, children: List[LarkToken], meta: Optional[Meta] = None): +# children = [TokenSequence(children)] +# super().__init__(children, meta) +# +# def serialized_type(self) -> Type: +# return str +# +# def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: +# return self.serialized_type()(self._children[0].serialize(options)) +# +# +# class IdentifierRule(TokenSequenceRule): +# @staticmethod +# def lark_name() -> str: +# return "identifier" +# +# +# class IntLitRule(TokenSequenceRule): +# @staticmethod +# def lark_name() -> str: +# return "int_lit" +# +# def serialized_type(self) -> Type: +# return int +# +# +# class FloatLitRule(TokenSequenceRule): +# @staticmethod +# def lark_name() -> str: +# return "float_lit" +# +# def serialized_type(self) -> Type: +# return float +# +# +# class StringLitRule(TokenSequenceRule): +# @staticmethod +# def lark_name() -> str: +# # TODO actually this is a terminal, but it doesn't matter for lark.Transformer class; +# # nevertheless, try to change it to a rule in the grammar +# return "STRING_LIT" +# +# diff --git a/hcl2/rule_transformer/rules/tokens.py b/hcl2/rule_transformer/rules/tokens.py new file mode 100644 index 00000000..18e1ac07 --- /dev/null +++ b/hcl2/rule_transformer/rules/tokens.py @@ -0,0 +1,66 @@ +from typing import Callable, Any + +from hcl2.rule_transformer.rules.abstract import LarkToken + + +class StringToken(LarkToken): + def __init__(self, name: str, value: Any): + super().__init__(value) + self._name = name + + @property + def lark_name(self) -> str: + return self._name + + @property + def serialize_conversion(self) -> Callable: + return str + + +# explicitly define various kinds of string-based tokens +STRING_CHARS_TOKEN = StringToken +ESCAPED_INTERPOLATION_TOKEN = StringToken +BINARY_OP_TOKEN = StringToken +EQ_TOKEN = StringToken +COLON_TOKEN = StringToken +LPAR_TOKEN = StringToken # ( +RPAR_TOKEN = StringToken # ) +LBRACE_TOKEN = StringToken # { +RBRACE_TOKEN = StringToken # } +DOT_TOKEN = StringToken +COMMA_TOKEN = StringToken +QMARK_TOKEN = StringToken +LSQB_TOKEN = StringToken # [ +RSQB_TOKEN = StringToken # ] +INTERP_START_TOKEN = StringToken # ${ +DBLQUOTE_TOKEN = StringToken # " + + +class IdentifierToken(LarkToken): + @property + def lark_name(self) -> str: + return "IDENTIFIER" + + @property + def serialize_conversion(self) -> Callable: + return str + + +class IntToken(LarkToken): + @property + def lark_name(self) -> str: + return "INT_LITERAL" + + @property + def serialize_conversion(self) -> Callable: + return int + + +class FloatToken(LarkToken): + @property + def lark_name(self) -> str: + return "FLOAT_LITERAL" + + @property + def serialize_conversion(self) -> Callable: + return float diff --git a/hcl2/rule_transformer/rules/tree.py b/hcl2/rule_transformer/rules/tree.py new file mode 100644 index 00000000..e39d2077 --- /dev/null +++ b/hcl2/rule_transformer/rules/tree.py @@ -0,0 +1,106 @@ +from abc import ABC, abstractmethod +from typing import List, Optional, Any, Union + + +class LarkNode(ABC): + """Base class for all nodes in the tree""" + + def __init__(self, index: int = -1, parent: Optional["Node"] = None): + self._index = index + self._parent = parent + + @property + def parent(self) -> Optional["Node"]: + return self._parent + + @property + def index(self) -> int: + return self._index + + def set_parent(self, parent: "Node"): + self._parent = parent + + def set_index(self, index: int): + self._index = index + + @abstractmethod + def serialize(self, options=None) -> Any: + pass + + @abstractmethod + def to_lark(self) -> Any: + """Convert back to Lark representation""" + pass + + def is_leaf(self) -> bool: + """Check if this is a leaf node (atomic token)""" + return isinstance(self, LeafNode) + + def is_sequence(self) -> bool: + """Check if this is a token sequence node""" + return isinstance(self, SequenceNode) + + def is_internal(self) -> bool: + """Check if this is an internal node (grammar rule)""" + return isinstance(self, InternalNode) + + def is_atomic(self) -> bool: + """Check if this represents an atomic value (leaf or sequence)""" + return self.is_leaf() or self.is_sequence() + + +class LarkLeaf(Node, ABC): + """""" + + def __init__(self, value: Any, index: int = -1, parent: Optional[TreeNode] = None): + super().__init__(index, parent) + self._value = value + + @property + def value(self) -> Any: + return self._value + + def serialize(self, options=None) -> Any: + return self._value + + +class InternalNode(Node): + def __init__( + self, children: List[Node], index: int = -1, parent: Optional[Node] = None + ): + super().__init__(index, parent) + self._children = children or [] + + # Set parent and index for all children + for i, child in enumerate(self._children): + if child is not None: + child.set_parent(self) + child.set_index(i) + + @property + def children(self) -> List[Node]: + return self._children + + def add_child(self, child: Node): + """Add a child to this internal node""" + child.set_parent(self) + child.set_index(len(self._children)) + self._children.append(child) + + def remove_child(self, index: int) -> Optional[Node]: + """Remove child at given index""" + if 0 <= index < len(self._children): + child = self._children.pop(index) + if child: + child.set_parent(None) + # Update indices for remaining children + for i in range(index, len(self._children)): + if self._children[i]: + self._children[i].set_index(i) + return child + return None + + @abstractmethod + def rule_name(self) -> str: + """The name of the grammar rule this represents""" + pass diff --git a/hcl2/rule_transformer/rules/whitespace.py b/hcl2/rule_transformer/rules/whitespace.py index 96fe7c91..65d5dd9c 100644 --- a/hcl2/rule_transformer/rules/whitespace.py +++ b/hcl2/rule_transformer/rules/whitespace.py @@ -1,19 +1,19 @@ -from typing import Optional, List, Any +from abc import ABC +from typing import Optional, List, Any, Tuple -from hcl2.rule_transformer.rules.abstract import TokenSequence, LarkToken, LarkRule +from hcl2.rule_transformer.rules.abstract import LarkToken, LarkRule +from hcl2.rule_transformer.rules.literal_rules import TokenRule from hcl2.rule_transformer.utils import SerializationOptions -class NewLineOrCommentRule(LarkRule): - - _children: List[LarkToken] - - @staticmethod - def rule_name() -> str: +class NewLineOrCommentRule(TokenRule): + @property + def lark_name(self) -> str: return "new_line_or_comment" - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: - return TokenSequence(self._children).serialize(options) + @classmethod + def from_string(cls, string: str) -> "NewLineOrCommentRule": + return cls([LarkToken("NL_OR_COMMENT", string)]) def to_list( self, options: SerializationOptions = SerializationOptions() @@ -40,3 +40,29 @@ def to_list( result.append(comment.strip()) return result + + +class InlineCommentMixIn(LarkRule, ABC): + def _possibly_insert_null_comments(self, children: List, indexes: List[int] = None): + for index in indexes: + try: + child = children[index] + except IndexError: + children.insert(index, None) + else: + if not isinstance(child, NewLineOrCommentRule): + children.insert(index, None) + + def inline_comments(self): + result = [] + for child in self._children: + + if isinstance(child, NewLineOrCommentRule): + comments = child.to_list() + if comments is not None: + result.extend(comments) + + elif isinstance(child, InlineCommentMixIn): + result.extend(child.inline_comments()) + + return result diff --git a/hcl2/rule_transformer/transformer.py b/hcl2/rule_transformer/transformer.py index 1c7d6157..31e88d61 100644 --- a/hcl2/rule_transformer/transformer.py +++ b/hcl2/rule_transformer/transformer.py @@ -1,30 +1,45 @@ # pylint: disable=missing-function-docstring,unused-argument from typing import List, Union -from lark import Transformer, Tree, Token +from lark import Token, Tree, v_args, Transformer, Discard from lark.tree import Meta -from lark.visitors import _Leaf_T, Discard, v_args -from hcl2.rule_transformer.rules.abstract import LarkToken, LarkRule from hcl2.rule_transformer.rules.base import ( StartRule, BodyRule, BlockRule, AttributeRule, ) +from hcl2.rule_transformer.rules.containers import ( + ObjectRule, + ObjectElemRule, + ObjectElemKeyRule, +) from hcl2.rule_transformer.rules.expression import ( BinaryTermRule, - ConditionalRule, - ExprTermRule, - BinaryOpRule, UnaryOpRule, + BinaryOpRule, + ExprTermRule, + ConditionalRule, ) -from hcl2.rule_transformer.rules.token_sequence import ( - IdentifierRule, - IntLitRule, +from hcl2.rule_transformer.rules.indexing import ( + IndexExprTermRule, + SqbIndex, + ShortIndexRule, +) +from hcl2.rule_transformer.rules.literal_rules import ( FloatLitRule, - StringLitRule, + IntLitRule, + IdentifierRule, BinaryOperatorRule, + StringPartRule, +) +from hcl2.rule_transformer.rules.strings import InterpolationRule, StringRule +from hcl2.rule_transformer.rules.tokens import ( + IdentifierToken, + StringToken, + IntToken, + FloatToken, ) from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule @@ -36,18 +51,24 @@ class RuleTransformer(Transformer): with_meta: bool - @staticmethod - def is_type_keyword(value: str) -> bool: - return value in {"bool", "number", "string"} + def transform(self, tree: Tree) -> StartRule: + return super().transform(tree) - def __init__(self): + def __init__(self, discard_new_line_or_comments: bool = False): super().__init__() + self.discard_new_line_or_comments = discard_new_line_or_comments - def transform(self, tree: Tree[_Leaf_T]) -> LarkRule: - return super().transform(tree) + def __default_token__(self, token: Token) -> StringToken: + return StringToken(token.type, token.value) + + def IDENTIFIER(self, token: Token) -> IdentifierToken: + return IdentifierToken(token.value) - def __default_token__(self, token: Token) -> LarkToken: - return LarkToken(token.type, token.value) + def INT_LITERAL(self, token: Token) -> IntToken: + return IntToken(token.value) + + def FLOAT_LITERAL(self, token: Token) -> FloatToken: + return FloatToken(token.value) @v_args(meta=True) def start(self, meta: Meta, args) -> StartRule: @@ -61,6 +82,16 @@ def body(self, meta: Meta, args) -> BodyRule: def block(self, meta: Meta, args) -> BlockRule: return BlockRule(args, meta) + @v_args(meta=True) + def attribute(self, meta: Meta, args) -> AttributeRule: + return AttributeRule(args, meta) + + @v_args(meta=True) + def new_line_or_comment(self, meta: Meta, args) -> NewLineOrCommentRule: + if self.discard_new_line_or_comments: + return Discard + return NewLineOrCommentRule(args, meta) + @v_args(meta=True) def identifier(self, meta: Meta, args) -> IdentifierRule: return IdentifierRule(args, meta) @@ -74,8 +105,16 @@ def float_lit(self, meta: Meta, args) -> FloatLitRule: return FloatLitRule(args, meta) @v_args(meta=True) - def string_lit(self, meta: Meta, args) -> StringLitRule: - return StringLitRule(args, meta) + def string(self, meta: Meta, args) -> StringRule: + return StringRule(args, meta) + + @v_args(meta=True) + def string_part(self, meta: Meta, args) -> StringPartRule: + return StringPartRule(args, meta) + + @v_args(meta=True) + def interpolation(self, meta: Meta, args) -> InterpolationRule: + return InterpolationRule(args, meta) @v_args(meta=True) def expr_term(self, meta: Meta, args) -> ExprTermRule: @@ -102,9 +141,25 @@ def binary_op(self, meta: Meta, args) -> BinaryOpRule: return BinaryOpRule(args, meta) @v_args(meta=True) - def attribute(self, meta: Meta, args) -> AttributeRule: - return AttributeRule(args, meta) + def object(self, meta: Meta, args) -> ObjectRule: + return ObjectRule(args, meta) @v_args(meta=True) - def new_line_or_comment(self, meta: Meta, args) -> NewLineOrCommentRule: - return NewLineOrCommentRule(args, meta) + def object_elem(self, meta: Meta, args) -> ObjectElemRule: + return ObjectElemRule(args, meta) + + @v_args(meta=True) + def object_elem_key(self, meta: Meta, args) -> ObjectElemKeyRule: + return ObjectElemKeyRule(args, meta) + + @v_args(meta=True) + def index_expr_term(self, meta: Meta, args) -> IndexExprTermRule: + return IndexExprTermRule(args, meta) + + @v_args(meta=True) + def braces_index(self, meta: Meta, args) -> SqbIndex: + return SqbIndex(args, meta) + + @v_args(meta=True) + def short_index(self, meta: Meta, args) -> ShortIndexRule: + return ShortIndexRule(args, meta) diff --git a/hcl2/rule_transformer/utils.py b/hcl2/rule_transformer/utils.py index e083d628..6a6ed661 100644 --- a/hcl2/rule_transformer/utils.py +++ b/hcl2/rule_transformer/utils.py @@ -1,11 +1,15 @@ -from dataclasses import dataclass +from dataclasses import dataclass, replace @dataclass class SerializationOptions: with_comments: bool = True with_meta: bool = False - + unwrap_dollar_string: bool = False + + def replace(self, **kwargs) -> "SerializationOptions": + return replace(self, **kwargs) + def is_dollar_string(value: str) -> bool: if not isinstance(value, str): From 5a10fece33cf401c4e2b23a1655e983c3c708e55 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Wed, 23 Jul 2025 11:48:44 +0200 Subject: [PATCH 06/24] batch of changes --- hcl2/parser.py | 2 +- hcl2/reconstructor.py | 7 +- hcl2/rule_transformer/deserializer.py | 31 +++ hcl2/rule_transformer/hcl2.lark | 25 +- hcl2/rule_transformer/rules/abstract.py | 36 ++- hcl2/rule_transformer/rules/base.py | 61 +++-- hcl2/rule_transformer/rules/containers.py | 165 ++++++++++++-- .../rules/{expression.py => expressions.py} | 135 ++++++----- hcl2/rule_transformer/rules/functions.py | 104 +++++++++ hcl2/rule_transformer/rules/indexing.py | 215 ++++++++++++++++-- hcl2/rule_transformer/rules/literal_rules.py | 34 +-- hcl2/rule_transformer/rules/strings.py | 42 ++-- hcl2/rule_transformer/rules/token_sequence.py | 56 ----- hcl2/rule_transformer/rules/tokens.py | 99 ++++---- hcl2/rule_transformer/rules/whitespace.py | 4 +- hcl2/rule_transformer/transformer.py | 90 ++++++-- hcl2/rule_transformer/utils.py | 41 +++- 17 files changed, 835 insertions(+), 312 deletions(-) create mode 100644 hcl2/rule_transformer/deserializer.py rename hcl2/rule_transformer/rules/{expression.py => expressions.py} (53%) create mode 100644 hcl2/rule_transformer/rules/functions.py delete mode 100644 hcl2/rule_transformer/rules/token_sequence.py diff --git a/hcl2/parser.py b/hcl2/parser.py index a0c87e34..3e524736 100644 --- a/hcl2/parser.py +++ b/hcl2/parser.py @@ -12,7 +12,7 @@ def parser() -> Lark: """Build standard parser for transforming HCL2 text into python structures""" return Lark.open( - "rule_transformer/hcl2.lark.lark", + "rule_transformer/hcl2.lark", parser="lalr", cache=str(PARSER_FILE), # Disable/Delete file to effect changes to the grammar rel_to=__file__, diff --git a/hcl2/reconstructor.py b/hcl2/reconstructor.py index 7f957d7b..555edcf6 100644 --- a/hcl2/reconstructor.py +++ b/hcl2/reconstructor.py @@ -167,12 +167,17 @@ def _should_add_space(self, rule, current_terminal, is_block_label: bool = False if self._is_equals_sign(current_terminal): return True + if is_block_label: + pass + # print(rule, self._last_rule, current_terminal, self._last_terminal) + if is_block_label and isinstance(rule, Token) and rule.value == "string": if ( current_terminal == self._last_terminal == Terminal("DBLQUOTE") or current_terminal == Terminal("DBLQUOTE") - and self._last_terminal == Terminal("NAME") + and self._last_terminal == Terminal("IDENTIFIER") ): + # print("true") return True # if we're in a ternary or binary operator, add space around the operator diff --git a/hcl2/rule_transformer/deserializer.py b/hcl2/rule_transformer/deserializer.py new file mode 100644 index 00000000..5bdcf775 --- /dev/null +++ b/hcl2/rule_transformer/deserializer.py @@ -0,0 +1,31 @@ +import json +from typing import Any, TextIO, List + +from hcl2.rule_transformer.rules.abstract import LarkElement, LarkRule +from hcl2.rule_transformer.utils import DeserializationOptions + + +class Deserializer: + def __init__(self, options=DeserializationOptions()): + self.options = options + + def load_python(self, value: Any) -> LarkElement: + pass + + def loads(self, value: str) -> LarkElement: + return self.load_python(json.loads(value)) + + def load(self, file: TextIO) -> LarkElement: + return self.loads(file.read()) + + def _deserialize(self, value: Any) -> LarkElement: + pass + + def _deserialize_dict(self, value: dict) -> LarkRule: + pass + + def _deserialize_list(self, value: List) -> LarkRule: + pass + + def _deserialize_expression(self, value: str) -> LarkRule: + pass diff --git a/hcl2/rule_transformer/hcl2.lark b/hcl2/rule_transformer/hcl2.lark index a7722118..3f8d913e 100644 --- a/hcl2/rule_transformer/hcl2.lark +++ b/hcl2/rule_transformer/hcl2.lark @@ -11,11 +11,9 @@ IN : "in" FOR : "for" FOR_EACH : "for_each" -// Identifiers and Names -NAME : /[a-zA-Z_][a-zA-Z0-9_-]*/ -IDENTIFIER: NAME | IN | FOR | IF | FOR_EACH // Literals +NAME : /[a-zA-Z_][a-zA-Z0-9_-]*/ ESCAPED_INTERPOLATION.2: /\$\$\{[^}]*\}/ STRING_CHARS.1: /(?:(?!\$\$\{)(?!\$\{)[^"\\]|\\.|(?:\$(?!\$?\{)))+/ DECIMAL : "0".."9" @@ -91,7 +89,8 @@ block : identifier (identifier | string)* new_line_or_comment? LBRACE body RBRAC new_line_or_comment: ( NL_OR_COMMENT )+ // Basic literals and identifiers -identifier: IDENTIFIER +identifier : NAME +keyword: IN | FOR | IF | FOR_EACH int_lit: INT_LITERAL float_lit: FLOAT_LITERAL string: DBLQUOTE string_part* DBLQUOTE @@ -118,21 +117,20 @@ expr_term : LPAR new_line_or_comment? expression new_line_or_comment? RPAR | string | tuple | object - | function_call - | index_expr_term - | get_attr_expr_term | identifier - | provider_function_call + | function_call | heredoc_template | heredoc_template_trim + | index_expr_term + | get_attr_expr_term | attr_splat_expr_term | full_splat_expr_term | for_tuple_expr | for_object_expr // Collections -tuple : LSQB (new_line_or_comment* expression new_line_or_comment* COMMA)* (new_line_or_comment* expression)? new_line_or_comment* RSQB -object : LBRACE new_line_or_comment? (new_line_or_comment* (object_elem | (object_elem COMMA)) new_line_or_comment*)* RBRACE +tuple : LSQB new_line_or_comment? (expression new_line_or_comment? COMMA new_line_or_comment?)* (expression new_line_or_comment? COMMA? new_line_or_comment?)? RSQB +object : LBRACE new_line_or_comment? ((object_elem | (object_elem new_line_or_comment? COMMA)) new_line_or_comment?)* RBRACE object_elem : object_elem_key ( EQ | COLON ) expression object_elem_key : float_lit | int_lit | identifier | string | object_elem_key_dot_accessor | object_elem_key_expression object_elem_key_expression : LPAR expression RPAR @@ -143,9 +141,8 @@ heredoc_template : HEREDOC_TEMPLATE heredoc_template_trim : HEREDOC_TEMPLATE_TRIM // Functions -function_call : identifier LPAR new_line_or_comment? arguments? new_line_or_comment? RPAR -arguments : (expression (new_line_or_comment* COMMA new_line_or_comment* expression)* (COMMA | ELLIPSIS)? new_line_or_comment*) -provider_function_call: identifier COLONS identifier COLONS identifier LPAR new_line_or_comment? arguments? new_line_or_comment? RPAR +function_call : identifier (COLONS identifier COLONS identifier)? LPAR new_line_or_comment? arguments? new_line_or_comment? RPAR +arguments : (expression (new_line_or_comment? COMMA new_line_or_comment? expression)* (COMMA | ELLIPSIS)? new_line_or_comment?) // Indexing and attribute access index_expr_term : expr_term index @@ -156,7 +153,7 @@ full_splat_expr_term : expr_term full_splat braces_index : LSQB new_line_or_comment? expression new_line_or_comment? RSQB short_index : DOT INT_LITERAL get_attr : DOT identifier -attr_splat : ATTR_SPLAT get_attr* +attr_splat : ATTR_SPLAT (get_attr | index)* full_splat : FULL_SPLAT_START (get_attr | index)* // For expressions diff --git a/hcl2/rule_transformer/rules/abstract.py b/hcl2/rule_transformer/rules/abstract.py index d3a3b634..e32d9ddb 100644 --- a/hcl2/rule_transformer/rules/abstract.py +++ b/hcl2/rule_transformer/rules/abstract.py @@ -2,15 +2,16 @@ from typing import Any, Union, List, Optional, Tuple, Callable from lark import Token, Tree +from lark.exceptions import VisitError from lark.tree import Meta -from hcl2.rule_transformer.utils import SerializationOptions +from hcl2.rule_transformer.utils import SerializationOptions, SerializationContext class LarkElement(ABC): - @property + @staticmethod @abstractmethod - def lark_name(self) -> str: + def lark_name() -> str: raise NotImplementedError() def __init__(self, index: int = -1, parent: "LarkElement" = None): @@ -28,7 +29,9 @@ def to_lark(self) -> Any: raise NotImplementedError() @abstractmethod - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: raise NotImplementedError() @@ -37,11 +40,6 @@ def __init__(self, value: Union[str, int]): self._value = value super().__init__() - @property - @abstractmethod - def lark_name(self) -> str: - raise NotImplementedError() - @property @abstractmethod def serialize_conversion(self) -> Callable: @@ -51,27 +49,26 @@ def serialize_conversion(self) -> Callable: def value(self): return self._value - def serialize(self, options: SerializationOptions = SerializationOptions()): + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: return self.serialize_conversion(self.value) def to_lark(self) -> Token: - return Token(self.lark_name, self.value) + return Token(self.lark_name(), self.value) def __str__(self) -> str: return str(self._value) def __repr__(self) -> str: - return f"" + return f"" class LarkRule(LarkElement, ABC): - @property - @abstractmethod - def lark_name(self) -> str: - raise NotImplementedError() - @abstractmethod - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: raise NotImplementedError() @property @@ -94,7 +91,7 @@ def to_lark(self) -> Tree: result_children.append(child.to_lark()) - return Tree(self.lark_name, result_children, meta=self._meta) + return Tree(self.lark_name(), result_children, meta=self._meta) def __init__(self, children: List[LarkElement], meta: Optional[Meta] = None): super().__init__() @@ -103,6 +100,7 @@ def __init__(self, children: List[LarkElement], meta: Optional[Meta] = None): for index, child in enumerate(children): if child is not None: + print(child) child.set_index(index) child.set_parent(self) diff --git a/hcl2/rule_transformer/rules/base.py b/hcl2/rule_transformer/rules/base.py index 6d0c4924..da74954b 100644 --- a/hcl2/rule_transformer/rules/base.py +++ b/hcl2/rule_transformer/rules/base.py @@ -3,34 +3,37 @@ from lark.tree import Meta +from hcl2.dict_transformer import START_LINE, END_LINE from hcl2.rule_transformer.rules.abstract import LarkRule, LarkToken -from hcl2.rule_transformer.rules.expression import Expression -from hcl2.rule_transformer.rules.tokens import IdentifierToken, EQ_TOKEN +from hcl2.rule_transformer.rules.expressions import ExpressionRule +from hcl2.rule_transformer.rules.tokens import NAME, EQ from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule -from hcl2.rule_transformer.utils import SerializationOptions +from hcl2.rule_transformer.utils import SerializationOptions, SerializationContext class AttributeRule(LarkRule): _children: Tuple[ - IdentifierToken, - EQ_TOKEN, - Expression, + NAME, + EQ, + ExpressionRule, ] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "attribute" @property - def identifier(self) -> IdentifierToken: + def identifier(self) -> NAME: return self._children[0] @property - def expression(self) -> Expression: + def expression(self) -> ExpressionRule: return self._children[2] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: return {self.identifier.serialize(options): self.expression.serialize(options)} @@ -44,11 +47,13 @@ class BodyRule(LarkRule): ] ] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "body" - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: blocks: List[BlockRule] = [] attributes: List[AttributeRule] = [] comments = [] @@ -99,11 +104,13 @@ class StartRule(LarkRule): def body(self) -> BodyRule: return self._children[0] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "start" - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: return self.body.serialize(options) @@ -118,12 +125,12 @@ def __init__(self, children, meta: Optional[Meta] = None): child for child in children if not isinstance(child, LarkToken) ] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "block" @property - def labels(self) -> List[IdentifierToken]: + def labels(self) -> List[NAME]: return list(filter(lambda label: label is not None, self._labels)) @property @@ -131,10 +138,18 @@ def body(self) -> BodyRule: return self._body def serialize( - self, options: SerializationOptions = SerializationOptions() - ) -> BodyRule: + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: result = self._body.serialize(options) labels = self._labels - for label in reversed(labels): + for label in reversed(labels[1:]): result = {label.serialize(options): result} + + result.update( + { + START_LINE: self._meta.line, + END_LINE: self._meta.end_line, + } + ) + return result diff --git a/hcl2/rule_transformer/rules/containers.py b/hcl2/rule_transformer/rules/containers.py index c39f3ba2..11ac0f5e 100644 --- a/hcl2/rule_transformer/rules/containers.py +++ b/hcl2/rule_transformer/rules/containers.py @@ -1,7 +1,8 @@ +import json from typing import Tuple, List, Optional, Union, Any from hcl2.rule_transformer.rules.abstract import LarkRule -from hcl2.rule_transformer.rules.expression import Expression +from hcl2.rule_transformer.rules.expressions import ExpressionRule from hcl2.rule_transformer.rules.literal_rules import ( FloatLitRule, IntLitRule, @@ -9,36 +10,135 @@ ) from hcl2.rule_transformer.rules.strings import StringRule from hcl2.rule_transformer.rules.tokens import ( - COLON_TOKEN, - EQ_TOKEN, - LBRACE_TOKEN, - COMMA_TOKEN, - RBRACE_TOKEN, + COLON, + EQ, + LBRACE, + COMMA, + RBRACE, LSQB, RSQB, LPAR, RPAR, DOT, ) from hcl2.rule_transformer.rules.whitespace import ( NewLineOrCommentRule, InlineCommentMixIn, ) -from hcl2.rule_transformer.utils import SerializationOptions +from hcl2.rule_transformer.utils import SerializationOptions, SerializationContext, to_dollar_string + + +class TupleRule(InlineCommentMixIn): + + _children: Tuple[ + LSQB, + Optional[NewLineOrCommentRule], + Tuple[ + ExpressionRule, + Optional[NewLineOrCommentRule], + COMMA, + Optional[NewLineOrCommentRule], + ... + ], + ExpressionRule, + Optional[NewLineOrCommentRule], + Optional[COMMA], + Optional[NewLineOrCommentRule], + RSQB, + ] + + @staticmethod + def lark_name() -> str: + return "tuple" + + @property + def elements(self) -> List[ExpressionRule]: + return [ + child for child in self.children[1:-1] if isinstance(child, ExpressionRule) + ] + + def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: + if not options.wrap_tuples: + return [element.serialize(options, context) for element in self.elements] + + with context.modify(inside_dollar_string=True): + result = f"[{", ".join( + str(element.serialize(options, context)) for element in self.elements + )}]" + + if not context.inside_dollar_string: + result = to_dollar_string(result) + + return result class ObjectElemKeyRule(LarkRule): - _children: Tuple[Union[FloatLitRule, IntLitRule, IdentifierRule, StringRule]] + + key_T = Union[FloatLitRule, IntLitRule, IdentifierRule, StringRule] + + _children: Tuple[key_T] @staticmethod def lark_name() -> str: return "object_elem_key" - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: - return self.children[0].serialize(options) + @property + def value(self) -> key_T: + return self._children[0] + + def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: + return self.value.serialize(options, context) + + +class ObjectElemKeyExpressionRule(LarkRule): + + _children: Tuple[ + LPAR, + ExpressionRule, + RPAR, + ] + + + @staticmethod + def lark_name() -> str: + return "object_elem_key_expression" + + @property + def expression(self) -> ExpressionRule: + return self._children[1] + + def serialize(self, options=SerializationOptions(), context=SerializationContext()) -> Any: + with context.modify(inside_dollar_string=True): + result = f"({self.expression.serialize(options, context)})" + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result + + +class ObjectElemKeyDotAccessor(LarkRule): + + _children: Tuple[ + IdentifierRule, + Tuple[ + IdentifierRule, + DOT, + ... + ] + ] + + @staticmethod + def lark_name() -> str: + return "object_elem_key_dot_accessor" + + @property + def identifiers(self) -> List[IdentifierRule]: + return [child for child in self._children if isinstance(child, IdentifierRule)] + + def serialize(self, options=SerializationOptions(), context=SerializationContext()) -> Any: + return ".".join(identifier.serialize(options, context) for identifier in self.identifiers) class ObjectElemRule(LarkRule): _children: Tuple[ ObjectElemKeyRule, - Union[EQ_TOKEN, COLON_TOKEN], - Expression, + Union[EQ, COLON], + ExpressionRule, ] @staticmethod @@ -47,25 +147,31 @@ def lark_name() -> str: @property def key(self) -> ObjectElemKeyRule: - return self.children[0] + return self._children[0] @property def expression(self): - return self.children[2] + return self._children[2] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: return { - self.children[0].serialize(options): self.children[2].serialize(options) + self.key.serialize(options, context): self.expression.serialize(options, context) } class ObjectRule(InlineCommentMixIn): _children: Tuple[ - LBRACE_TOKEN, + LBRACE, Optional[NewLineOrCommentRule], - Tuple[Union[ObjectElemRule, Optional[COMMA_TOKEN], NewLineOrCommentRule], ...], - RBRACE_TOKEN, + Tuple[ + ObjectElemRule, + Optional[NewLineOrCommentRule], + Optional[COMMA], + Optional[NewLineOrCommentRule], + ... + ], + RBRACE, ] @staticmethod @@ -78,8 +184,21 @@ def elements(self) -> List[ObjectElemRule]: child for child in self.children[1:-1] if isinstance(child, ObjectElemRule) ] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: - result = {} - for element in self.elements: - result.update(element.serialize()) + def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: + if not options.wrap_objects: + result = {} + for element in self.elements: + result.update(element.serialize(options, context)) + + return result + + with context.modify(inside_dollar_string=True): + result = f"{{{", ".join( + f"{element.key.serialize(options, context)} = {element.expression.serialize(options,context)}" + for element in self.elements + )}}}" + + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result diff --git a/hcl2/rule_transformer/rules/expression.py b/hcl2/rule_transformer/rules/expressions.py similarity index 53% rename from hcl2/rule_transformer/rules/expression.py rename to hcl2/rule_transformer/rules/expressions.py index 8a03f813..d89f3b3c 100644 --- a/hcl2/rule_transformer/rules/expression.py +++ b/hcl2/rule_transformer/rules/expressions.py @@ -8,7 +8,7 @@ LarkToken, ) from hcl2.rule_transformer.rules.literal_rules import BinaryOperatorRule -from hcl2.rule_transformer.rules.tokens import LPAR_TOKEN, RPAR_TOKEN, QMARK_TOKEN, COLON_TOKEN +from hcl2.rule_transformer.rules.tokens import LPAR, RPAR, QMARK, COLON from hcl2.rule_transformer.rules.whitespace import ( NewLineOrCommentRule, InlineCommentMixIn, @@ -18,46 +18,46 @@ to_dollar_string, unwrap_dollar_string, SerializationOptions, + SerializationContext, ) -class Expression(InlineCommentMixIn, ABC): - @property - def lark_name(self) -> str: +class ExpressionRule(InlineCommentMixIn, ABC): + @staticmethod + def lark_name() -> str: return "expression" def __init__(self, children, meta: Optional[Meta] = None): super().__init__(children, meta) -class ExprTermRule(Expression): +class ExprTermRule(ExpressionRule): type_ = Tuple[ - Optional[LPAR_TOKEN], + Optional[LPAR], Optional[NewLineOrCommentRule], - Expression, + ExpressionRule, Optional[NewLineOrCommentRule], - Optional[RPAR_TOKEN], + Optional[RPAR], ] _children: type_ - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "expr_term" def __init__(self, children, meta: Optional[Meta] = None): self._parentheses = False if ( isinstance(children[0], LarkToken) - and children[0].lark_name == "LPAR" + and children[0].lark_name() == "LPAR" and isinstance(children[-1], LarkToken) - and children[-1].lark_name == "RPAR" + and children[-1].lark_name() == "RPAR" ): self._parentheses = True else: children = [None, *children, None] - self._possibly_insert_null_comments(children, [1, 3]) super().__init__(children, meta) @@ -66,35 +66,37 @@ def parentheses(self) -> bool: return self._parentheses @property - def expression(self) -> Expression: + def expression(self) -> ExpressionRule: return self._children[2] - def serialize(self , unwrap: bool = False, options: SerializationOptions = SerializationOptions()) -> Any: - result = self.expression.serialize(options) + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + result = self.expression.serialize(options, context) + if self.parentheses: result = wrap_into_parentheses(result) - result = to_dollar_string(result) - - if options.unwrap_dollar_string: - result = unwrap_dollar_string(result) + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result -class ConditionalRule(Expression): +class ConditionalRule(ExpressionRule): _children: Tuple[ - Expression, - QMARK_TOKEN, + ExpressionRule, + QMARK, Optional[NewLineOrCommentRule], - Expression, + ExpressionRule, Optional[NewLineOrCommentRule], - COLON_TOKEN, + COLON, Optional[NewLineOrCommentRule], - Expression, + ExpressionRule, ] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "conditional" def __init__(self, children, meta: Optional[Meta] = None): @@ -102,25 +104,34 @@ def __init__(self, children, meta: Optional[Meta] = None): super().__init__(children, meta) @property - def condition(self) -> Expression: + def condition(self) -> ExpressionRule: return self._children[0] @property - def if_true(self) -> Expression: + def if_true(self) -> ExpressionRule: return self._children[3] @property - def if_false(self) -> Expression: + def if_false(self) -> ExpressionRule: return self._children[7] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: - options = options.replace(unwrap_dollar_string=True) - print(self.condition) - result = f"{self.condition.serialize(options)} ? {self.if_true.serialize(options)} : {self.if_false.serialize(options)}" - return to_dollar_string(result) + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + with context.modify(inside_dollar_string=False): + result = ( + f"{self.condition.serialize(options, context)} " + f"? {self.if_true.serialize(options, context)} " + f": {self.if_false.serialize(options, context)}" + ) + + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result -class BinaryTermRule(Expression): + +class BinaryTermRule(ExpressionRule): _children: Tuple[ BinaryOperatorRule, @@ -128,8 +139,8 @@ class BinaryTermRule(Expression): ExprTermRule, ] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "binary_term" def __init__(self, children, meta: Optional[Meta] = None): @@ -144,19 +155,21 @@ def binary_operator(self) -> BinaryOperatorRule: def expr_term(self) -> ExprTermRule: return self._children[2] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: - return f"{self.binary_operator.serialize(options)} {self.expr_term.serialize(options)}" + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return f"{self.binary_operator.serialize(options, context)} {self.expr_term.serialize(options, context)}" -class BinaryOpRule(Expression): +class BinaryOpRule(ExpressionRule): _children: Tuple[ ExprTermRule, BinaryTermRule, Optional[NewLineOrCommentRule], ] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "binary_op" @property @@ -167,24 +180,28 @@ def expr_term(self) -> ExprTermRule: def binary_term(self) -> BinaryTermRule: return self._children[1] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: - children_options = options.replace(unwrap_dollar_string=True) - lhs = self.expr_term.serialize(children_options) - operator = self.binary_term.binary_operator.serialize(children_options) - rhs = self.binary_term.expr_term.serialize(children_options) + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + + with context.modify(inside_dollar_string=True): + lhs = self.expr_term.serialize(options, context) + operator = self.binary_term.binary_operator.serialize(options, context) + rhs = self.binary_term.expr_term.serialize(options, context) result = f"{lhs} {operator} {rhs}" - if options.unwrap_dollar_string: - return result - return to_dollar_string(result) + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result -class UnaryOpRule(Expression): + +class UnaryOpRule(ExpressionRule): _children: Tuple[LarkToken, ExprTermRule] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "unary_op" @property @@ -195,5 +212,9 @@ def operator(self) -> str: def expr_term(self): return self._children[1] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: - return to_dollar_string(f"{self.operator}{self.expr_term.serialize(options)}") + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return to_dollar_string( + f"{self.operator}{self.expr_term.serialize(options, context)}" + ) diff --git a/hcl2/rule_transformer/rules/functions.py b/hcl2/rule_transformer/rules/functions.py new file mode 100644 index 00000000..412a1667 --- /dev/null +++ b/hcl2/rule_transformer/rules/functions.py @@ -0,0 +1,104 @@ +from functools import lru_cache +from typing import Any, Optional, Tuple, Union, List + +from hcl2.rule_transformer.rules.expressions import ExpressionRule +from hcl2.rule_transformer.rules.literal_rules import IdentifierRule +from hcl2.rule_transformer.rules.tokens import COMMA, ELLIPSIS, StringToken, LPAR, RPAR +from hcl2.rule_transformer.rules.whitespace import InlineCommentMixIn, NewLineOrCommentRule +from hcl2.rule_transformer.utils import SerializationOptions, SerializationContext, to_dollar_string + + +class ArgumentsRule(InlineCommentMixIn): + + _children: Tuple[ + ExpressionRule, + Tuple[ + Optional[NewLineOrCommentRule], + COMMA, + Optional[NewLineOrCommentRule], + ExpressionRule, + ... + ], + Optional[Union[COMMA, ELLIPSIS]], + Optional[NewLineOrCommentRule], + ] + + @staticmethod + def lark_name() -> str: + return "arguments" + + @property + @lru_cache(maxsize=None) + def has_ellipsis(self) -> bool: + for child in self._children[-2:]: + if isinstance(child, StringToken) and child.lark_name() == "ELLIPSIS": + return True + return False + + @property + def arguments(self) -> List[ExpressionRule]: + return [child for child in self._children if isinstance(child, ExpressionRule)] + + def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: + result = ", ".join([argument.serialize(options, context) for argument in self.arguments]) + if self.has_ellipsis: + result += " ..." + return result + + +class FunctionCallRule(InlineCommentMixIn): + + _children: Tuple[ + IdentifierRule, + Optional[IdentifierRule], + Optional[IdentifierRule], + LPAR, + Optional[NewLineOrCommentRule], + Optional[ArgumentsRule], + Optional[NewLineOrCommentRule], + RPAR, + ] + + @staticmethod + def lark_name() -> str: + return "function_call" + + @property + @lru_cache(maxsize=None) + def identifiers(self) -> List[IdentifierRule]: + return [child for child in self._children if isinstance(child, IdentifierRule)] + + @property + @lru_cache(maxsize=None) + def arguments(self) -> Optional[ArgumentsRule]: + for child in self._children[2:6]: + if isinstance(child, ArgumentsRule): + return child + + + def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: + result = ( + f"{":".join(identifier.serialize(options, context) for identifier in self.identifiers)}" + f"({self.arguments.serialize(options, context) if self.arguments else ""})" + ) + if not context.inside_dollar_string: + result = to_dollar_string(result) + + return result + + +# class ProviderFunctionCallRule(FunctionCallRule): +# _children: Tuple[ +# IdentifierRule, +# IdentifierRule, +# IdentifierRule, +# LPAR, +# Optional[NewLineOrCommentRule], +# Optional[ArgumentsRule], +# Optional[NewLineOrCommentRule], +# RPAR, +# ] +# +# @staticmethod +# def lark_name() -> str: +# return "provider_function_call" diff --git a/hcl2/rule_transformer/rules/indexing.py b/hcl2/rule_transformer/rules/indexing.py index ce23d040..7a9b53a5 100644 --- a/hcl2/rule_transformer/rules/indexing.py +++ b/hcl2/rule_transformer/rules/indexing.py @@ -1,59 +1,69 @@ -from typing import List, Optional, Tuple, Any +from typing import List, Optional, Tuple, Any, Union from lark.tree import Meta from hcl2.rule_transformer.rules.abstract import LarkRule -from hcl2.rule_transformer.rules.expression import ExprTermRule, Expression +from hcl2.rule_transformer.rules.expressions import ExprTermRule, ExpressionRule +from hcl2.rule_transformer.rules.literal_rules import IdentifierRule from hcl2.rule_transformer.rules.tokens import ( - DOT_TOKEN, - IntToken, - LSQB_TOKEN, - RSQB_TOKEN, + DOT, + IntLiteral, + LSQB, + RSQB, + ATTR_SPLAT, ) from hcl2.rule_transformer.rules.whitespace import ( InlineCommentMixIn, NewLineOrCommentRule, ) -from hcl2.rule_transformer.utils import SerializationOptions, to_dollar_string +from hcl2.rule_transformer.utils import ( + SerializationOptions, + to_dollar_string, + SerializationContext, +) class ShortIndexRule(LarkRule): _children: Tuple[ - DOT_TOKEN, - IntToken, + DOT, + IntLiteral, ] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "short_index" @property def index(self): return self.children[1] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: return f".{self.index.serialize(options)}" -class SqbIndex(InlineCommentMixIn): +class SqbIndexRule(InlineCommentMixIn): _children: Tuple[ - LSQB_TOKEN, + LSQB, Optional[NewLineOrCommentRule], ExprTermRule, Optional[NewLineOrCommentRule], - RSQB_TOKEN, + RSQB, ] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "braces_index" @property def index_expression(self): return self.children[2] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: return f"[{self.index_expression.serialize(options)}]" def __init__(self, children, meta: Optional[Meta] = None): @@ -61,15 +71,170 @@ def __init__(self, children, meta: Optional[Meta] = None): super().__init__(children, meta) -class IndexExprTermRule(Expression): +class IndexExprTermRule(ExpressionRule): - _children: Tuple[ExprTermRule, SqbIndex] + _children: Tuple[ExprTermRule, SqbIndexRule] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "index_expr_term" - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: - return to_dollar_string( - f"{self.children[0].serialize(options)}{self.children[1].serialize(options)}" + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + with context.modify(inside_dollar_string=True): + result = f"{self.children[0].serialize(options)}{self.children[1].serialize(options)}" + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result + + +class GetAttrRule(LarkRule): + + _children: Tuple[ + DOT, + IdentifierRule, + ] + + @staticmethod + def lark_name() -> str: + return "get_attr" + + @property + def identifier(self) -> IdentifierRule: + return self._children[1] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return f".{self.identifier.serialize(options, context)}" + + +class GetAttrExprTermRule(ExpressionRule): + + _children: Tuple[ + ExprTermRule, + GetAttrRule, + ] + + @staticmethod + def lark_name() -> str: + return "get_attr_expr_term" + + @property + def expr_term(self) -> ExprTermRule: + return self._children[0] + + @property + def get_attr(self) -> GetAttrRule: + return self._children[1] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + with context.modify(inside_dollar_string=True): + result = f"{self.expr_term.serialize(options, context)}{self.get_attr.serialize(options, context)}" + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result + + +class AttrSplatRule(LarkRule): + _children: Tuple[ + ATTR_SPLAT, + Tuple[Union[GetAttrRule, Union[SqbIndexRule, ShortIndexRule]], ...], + ] + + @staticmethod + def lark_name() -> str: + return "attr_splat" + + @property + def get_attrs( + self, + ) -> List[Union[GetAttrRule, Union[SqbIndexRule, ShortIndexRule]]]: + return self._children[1:] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return ".*" + "".join( + get_attr.serialize(options, context) for get_attr in self.get_attrs + ) + + +class AttrSplatExprTermRule(ExpressionRule): + + _children: Tuple[ExprTermRule, AttrSplatRule] + + @staticmethod + def lark_name() -> str: + return "attr_splat_expr_term" + + @property + def expr_term(self) -> ExprTermRule: + return self._children[0] + + @property + def attr_splat(self) -> AttrSplatRule: + return self._children[1] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + with context.modify(inside_dollar_string=True): + result = f"{self.expr_term.serialize(options, context)}{self.attr_splat.serialize(options, context)}" + + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result + + +class FullSplatRule(LarkRule): + _children: Tuple[ + ATTR_SPLAT, + Tuple[Union[GetAttrRule, Union[SqbIndexRule, ShortIndexRule]], ...], + ] + + @staticmethod + def lark_name() -> str: + return "full_splat" + + @property + def get_attrs( + self, + ) -> List[Union[GetAttrRule, Union[SqbIndexRule, ShortIndexRule]]]: + return self._children[1:] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return "[*]" + "".join( + get_attr.serialize(options, context) for get_attr in self.get_attrs ) + + +class FullSplatExprTermRule(ExpressionRule): + _children: Tuple[ExprTermRule, FullSplatRule] + + @staticmethod + def lark_name() -> str: + return "full_splat_expr_term" + + @property + def expr_term(self) -> ExprTermRule: + return self._children[0] + + @property + def attr_splat(self) -> FullSplatRule: + return self._children[1] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + with context.modify(inside_dollar_string=True): + result = f"{self.expr_term.serialize(options, context)}{self.attr_splat.serialize(options, context)}" + + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result diff --git a/hcl2/rule_transformer/rules/literal_rules.py b/hcl2/rule_transformer/rules/literal_rules.py index 06ca99ae..db7e8289 100644 --- a/hcl2/rule_transformer/rules/literal_rules.py +++ b/hcl2/rule_transformer/rules/literal_rules.py @@ -2,7 +2,7 @@ from typing import Any, Tuple from hcl2.rule_transformer.rules.abstract import LarkRule, LarkToken -from hcl2.rule_transformer.utils import SerializationOptions +from hcl2.rule_transformer.utils import SerializationOptions, SerializationContext class TokenRule(LarkRule, ABC): @@ -13,35 +13,43 @@ class TokenRule(LarkRule, ABC): def token(self) -> LarkToken: return self._children[0] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: return self.token.serialize() +class KeywordRule(TokenRule): + @staticmethod + def lark_name() -> str: + return "keyword" + + class IdentifierRule(TokenRule): - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "identifier" class IntLitRule(TokenRule): - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "int_lit" class FloatLitRule(TokenRule): - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "float_lit" class StringPartRule(TokenRule): - @property - def lark_name(self) -> str: - return "string" + @staticmethod + def lark_name() -> str: + return "string_part" class BinaryOperatorRule(TokenRule): - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "binary_operator" diff --git a/hcl2/rule_transformer/rules/strings.py b/hcl2/rule_transformer/rules/strings.py index 0f53c55a..dc3b85b0 100644 --- a/hcl2/rule_transformer/rules/strings.py +++ b/hcl2/rule_transformer/rules/strings.py @@ -3,48 +3,56 @@ from lark.tree import Meta from hcl2.rule_transformer.rules.abstract import LarkRule -from hcl2.rule_transformer.rules.expression import Expression, ExprTermRule +from hcl2.rule_transformer.rules.expressions import ExpressionRule, ExprTermRule from hcl2.rule_transformer.rules.literal_rules import StringPartRule from hcl2.rule_transformer.rules.tokens import ( - INTERP_START_TOKEN, - RBRACE_TOKEN, - DBLQUOTE_TOKEN, - STRING_CHARS_TOKEN, + INTERP_START, + RBRACE, + DBLQUOTE, + STRING_CHARS, +) +from hcl2.rule_transformer.utils import ( + SerializationOptions, + SerializationContext, + to_dollar_string, ) -from hcl2.rule_transformer.utils import SerializationOptions class StringRule(LarkRule): - _children: Tuple[DBLQUOTE_TOKEN, List[StringPartRule], DBLQUOTE_TOKEN] + _children: Tuple[DBLQUOTE, List[StringPartRule], DBLQUOTE] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "string" @property def string_parts(self): return self.children[1:-1] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: return '"' + "".join(part.serialize() for part in self.string_parts) + '"' class InterpolationRule(LarkRule): _children: Tuple[ - INTERP_START_TOKEN, - Expression, - RBRACE_TOKEN, + INTERP_START, + ExpressionRule, + RBRACE, ] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "interpolation" @property def expression(self): return self.children[1] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: - return "${" + self.expression.serialize(options) + "}" + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return to_dollar_string(self.expression.serialize(options)) diff --git a/hcl2/rule_transformer/rules/token_sequence.py b/hcl2/rule_transformer/rules/token_sequence.py deleted file mode 100644 index 66d780b3..00000000 --- a/hcl2/rule_transformer/rules/token_sequence.py +++ /dev/null @@ -1,56 +0,0 @@ -# from abc import ABC -# from typing import Tuple, Any, List, Optional, Type -# -# from lark.tree import Meta -# -# from hcl2.rule_transformer.rules.abstract import LarkRule, LarkToken -# from hcl2.rule_transformer.utils import SerializationOptions -# -# -# class TokenSequenceRule(LarkRule, ABC): -# -# _children: Tuple[TokenSequence] -# -# def __init__(self, children: List[LarkToken], meta: Optional[Meta] = None): -# children = [TokenSequence(children)] -# super().__init__(children, meta) -# -# def serialized_type(self) -> Type: -# return str -# -# def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: -# return self.serialized_type()(self._children[0].serialize(options)) -# -# -# class IdentifierRule(TokenSequenceRule): -# @staticmethod -# def lark_name() -> str: -# return "identifier" -# -# -# class IntLitRule(TokenSequenceRule): -# @staticmethod -# def lark_name() -> str: -# return "int_lit" -# -# def serialized_type(self) -> Type: -# return int -# -# -# class FloatLitRule(TokenSequenceRule): -# @staticmethod -# def lark_name() -> str: -# return "float_lit" -# -# def serialized_type(self) -> Type: -# return float -# -# -# class StringLitRule(TokenSequenceRule): -# @staticmethod -# def lark_name() -> str: -# # TODO actually this is a terminal, but it doesn't matter for lark.Transformer class; -# # nevertheless, try to change it to a rule in the grammar -# return "STRING_LIT" -# -# diff --git a/hcl2/rule_transformer/rules/tokens.py b/hcl2/rule_transformer/rules/tokens.py index 18e1ac07..7dd79f63 100644 --- a/hcl2/rule_transformer/rules/tokens.py +++ b/hcl2/rule_transformer/rules/tokens.py @@ -1,54 +1,67 @@ -from typing import Callable, Any +from functools import lru_cache +from typing import Callable, Any, Type from hcl2.rule_transformer.rules.abstract import LarkToken class StringToken(LarkToken): - def __init__(self, name: str, value: Any): + """ + Single run-time base class; every `StringToken["..."]` call returns a + cached subclass whose static `lark_name()` yields the given string. + """ + + @staticmethod + @lru_cache(maxsize=None) + def __build_subclass(name: str) -> Type["StringToken"]: + """Create a subclass with a constant `lark_name`.""" + return type( # type: ignore + f"{name}_TOKEN", + (StringToken,), + { + "__slots__": (), + "lark_name": staticmethod(lambda _n=name: _n), + }, + ) + + def __class_getitem__(cls, name: str) -> Type["StringToken"]: + if not isinstance(name, str): + raise TypeError("StringToken[...] expects a single str argument") + return cls.__build_subclass(name) + + def __init__(self, value: Any) -> None: super().__init__(value) - self._name = name @property - def lark_name(self) -> str: - return self._name - - @property - def serialize_conversion(self) -> Callable: - return str - - -# explicitly define various kinds of string-based tokens -STRING_CHARS_TOKEN = StringToken -ESCAPED_INTERPOLATION_TOKEN = StringToken -BINARY_OP_TOKEN = StringToken -EQ_TOKEN = StringToken -COLON_TOKEN = StringToken -LPAR_TOKEN = StringToken # ( -RPAR_TOKEN = StringToken # ) -LBRACE_TOKEN = StringToken # { -RBRACE_TOKEN = StringToken # } -DOT_TOKEN = StringToken -COMMA_TOKEN = StringToken -QMARK_TOKEN = StringToken -LSQB_TOKEN = StringToken # [ -RSQB_TOKEN = StringToken # ] -INTERP_START_TOKEN = StringToken # ${ -DBLQUOTE_TOKEN = StringToken # " - - -class IdentifierToken(LarkToken): - @property - def lark_name(self) -> str: - return "IDENTIFIER" - - @property - def serialize_conversion(self) -> Callable: + def serialize_conversion(self) -> Callable[[Any], str]: return str -class IntToken(LarkToken): - @property - def lark_name(self) -> str: +# explicitly define various kinds of string-based tokens for type hinting +NAME = StringToken["NAME"] +STRING_CHARS = StringToken["STRING_CHARS"] +ESCAPED_INTERPOLATION = StringToken["ESCAPED_INTERPOLATION"] +BINARY_OP = StringToken["BINARY_OP"] +EQ = StringToken["EQ"] +COLON = StringToken["COLON"] +LPAR = StringToken["LPAR"] +RPAR = StringToken["RPAR"] +LBRACE = StringToken["LBRACE"] +RBRACE = StringToken["RBRACE"] +DOT = StringToken["DOT"] +COMMA = StringToken["COMMA"] +ELLIPSIS = StringToken["ELLIPSIS"] +QMARK = StringToken["QMARK"] +LSQB = StringToken["LSQB"] +RSQB = StringToken["RSQB"] +INTERP_START = StringToken["INTERP_START"] +DBLQUOTE = StringToken["DBLQUOTE"] +ATTR_SPLAT = StringToken["ATTR_SPLAT"] +FULL_SPLAT = StringToken["FULL_SPLAT"] + + +class IntLiteral(LarkToken): + @staticmethod + def lark_name() -> str: return "INT_LITERAL" @property @@ -56,9 +69,9 @@ def serialize_conversion(self) -> Callable: return int -class FloatToken(LarkToken): - @property - def lark_name(self) -> str: +class FloatLiteral(LarkToken): + @staticmethod + def lark_name() -> str: return "FLOAT_LITERAL" @property diff --git a/hcl2/rule_transformer/rules/whitespace.py b/hcl2/rule_transformer/rules/whitespace.py index 65d5dd9c..fa24355c 100644 --- a/hcl2/rule_transformer/rules/whitespace.py +++ b/hcl2/rule_transformer/rules/whitespace.py @@ -7,8 +7,8 @@ class NewLineOrCommentRule(TokenRule): - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "new_line_or_comment" @classmethod diff --git a/hcl2/rule_transformer/transformer.py b/hcl2/rule_transformer/transformer.py index 31e88d61..41e970d6 100644 --- a/hcl2/rule_transformer/transformer.py +++ b/hcl2/rule_transformer/transformer.py @@ -14,18 +14,28 @@ ObjectRule, ObjectElemRule, ObjectElemKeyRule, + TupleRule, + ObjectElemKeyExpressionRule, + ObjectElemKeyDotAccessor, ) -from hcl2.rule_transformer.rules.expression import ( +from hcl2.rule_transformer.rules.expressions import ( BinaryTermRule, UnaryOpRule, BinaryOpRule, ExprTermRule, ConditionalRule, ) +from hcl2.rule_transformer.rules.functions import ArgumentsRule, FunctionCallRule from hcl2.rule_transformer.rules.indexing import ( IndexExprTermRule, - SqbIndex, + SqbIndexRule, ShortIndexRule, + GetAttrRule, + GetAttrExprTermRule, + AttrSplatExprTermRule, + AttrSplatRule, + FullSplatRule, + FullSplatExprTermRule, ) from hcl2.rule_transformer.rules.literal_rules import ( FloatLitRule, @@ -36,10 +46,10 @@ ) from hcl2.rule_transformer.rules.strings import InterpolationRule, StringRule from hcl2.rule_transformer.rules.tokens import ( - IdentifierToken, + NAME, + IntLiteral, + FloatLiteral, StringToken, - IntToken, - FloatToken, ) from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule @@ -59,16 +69,16 @@ def __init__(self, discard_new_line_or_comments: bool = False): self.discard_new_line_or_comments = discard_new_line_or_comments def __default_token__(self, token: Token) -> StringToken: - return StringToken(token.type, token.value) + return StringToken[token.type](token.value) - def IDENTIFIER(self, token: Token) -> IdentifierToken: - return IdentifierToken(token.value) + def FLOAT_LITERAL(self, token: Token) -> FloatLiteral: + return FloatLiteral(token.value) - def INT_LITERAL(self, token: Token) -> IntToken: - return IntToken(token.value) + def NAME(self, token: Token) -> NAME: + return NAME(token.value) - def FLOAT_LITERAL(self, token: Token) -> FloatToken: - return FloatToken(token.value) + def INT_LITERAL(self, token: Token) -> IntLiteral: + return IntLiteral(token.value) @v_args(meta=True) def start(self, meta: Meta, args) -> StartRule: @@ -140,6 +150,10 @@ def unary_op(self, meta: Meta, args) -> UnaryOpRule: def binary_op(self, meta: Meta, args) -> BinaryOpRule: return BinaryOpRule(args, meta) + @v_args(meta=True) + def tuple(self, meta: Meta, args) -> TupleRule: + return TupleRule(args, meta) + @v_args(meta=True) def object(self, meta: Meta, args) -> ObjectRule: return ObjectRule(args, meta) @@ -152,14 +166,62 @@ def object_elem(self, meta: Meta, args) -> ObjectElemRule: def object_elem_key(self, meta: Meta, args) -> ObjectElemKeyRule: return ObjectElemKeyRule(args, meta) + @v_args(meta=True) + def object_elem_key_expression( + self, meta: Meta, args + ) -> ObjectElemKeyExpressionRule: + return ObjectElemKeyExpressionRule(args, meta) + + @v_args(meta=True) + def object_elem_key_dot_accessor( + self, meta: Meta, args + ) -> ObjectElemKeyDotAccessor: + return ObjectElemKeyDotAccessor(args, meta) + + @v_args(meta=True) + def arguments(self, meta: Meta, args) -> ArgumentsRule: + return ArgumentsRule(args, meta) + + @v_args(meta=True) + def function_call(self, meta: Meta, args) -> FunctionCallRule: + return FunctionCallRule(args, meta) + + # @v_args(meta=True) + # def provider_function_call(self, meta: Meta, args) -> ProviderFunctionCallRule: + # return ProviderFunctionCallRule(args, meta) + @v_args(meta=True) def index_expr_term(self, meta: Meta, args) -> IndexExprTermRule: return IndexExprTermRule(args, meta) @v_args(meta=True) - def braces_index(self, meta: Meta, args) -> SqbIndex: - return SqbIndex(args, meta) + def braces_index(self, meta: Meta, args) -> SqbIndexRule: + return SqbIndexRule(args, meta) @v_args(meta=True) def short_index(self, meta: Meta, args) -> ShortIndexRule: return ShortIndexRule(args, meta) + + @v_args(meta=True) + def get_attr(self, meta: Meta, args) -> GetAttrRule: + return GetAttrRule(args, meta) + + @v_args(meta=True) + def get_attr_expr_term(self, meta: Meta, args) -> GetAttrExprTermRule: + return GetAttrExprTermRule(args, meta) + + @v_args(meta=True) + def attr_splat(self, meta: Meta, args) -> AttrSplatRule: + return AttrSplatRule(args, meta) + + @v_args(meta=True) + def attr_splat_expr_term(self, meta: Meta, args) -> AttrSplatExprTermRule: + return AttrSplatExprTermRule(args, meta) + + @v_args(meta=True) + def full_splat(self, meta: Meta, args) -> FullSplatRule: + return FullSplatRule(args, meta) + + @v_args(meta=True) + def full_splat_expr_term(self, meta: Meta, args) -> FullSplatExprTermRule: + return FullSplatExprTermRule(args, meta) diff --git a/hcl2/rule_transformer/utils.py b/hcl2/rule_transformer/utils.py index 6a6ed661..8ffeab8b 100644 --- a/hcl2/rule_transformer/utils.py +++ b/hcl2/rule_transformer/utils.py @@ -1,15 +1,48 @@ +from contextlib import contextmanager from dataclasses import dataclass, replace +from typing import Generator @dataclass class SerializationOptions: with_comments: bool = True with_meta: bool = False - unwrap_dollar_string: bool = False - - def replace(self, **kwargs) -> "SerializationOptions": + wrap_objects: bool = False + wrap_tuples: bool = False + + +@dataclass +class DeserializationOptions: + pass + + +@dataclass +class SerializationContext: + inside_dollar_string: bool = False + + def replace(self, **kwargs) -> "SerializationContext": return replace(self, **kwargs) - + + @contextmanager + def copy(self, **kwargs) -> Generator["SerializationContext", None, None]: + """Context manager that yields a modified copy of the context""" + modified_context = self.replace(**kwargs) + yield modified_context + + @contextmanager + def modify(self, **kwargs): + original_values = {key: getattr(self, key) for key in kwargs} + + for key, value in kwargs.items(): + setattr(self, key, value) + + try: + yield + finally: + # Restore original values + for key, value in original_values.items(): + setattr(self, key, value) + def is_dollar_string(value: str) -> bool: if not isinstance(value, str): From f0f6fc995624fc19878cfa86743aa899c7344b6c Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Tue, 12 Aug 2025 14:48:52 +0200 Subject: [PATCH 07/24] add JSON -> LarkElement deserializer; batch of other changes --- hcl2/const.py | 1 + hcl2/rule_transformer/deserializer.py | 264 ++++++++++++++++++- hcl2/rule_transformer/rules/abstract.py | 3 +- hcl2/rule_transformer/rules/base.py | 23 +- hcl2/rule_transformer/rules/functions.py | 2 +- hcl2/rule_transformer/rules/literal_rules.py | 6 - hcl2/rule_transformer/rules/strings.py | 57 ++-- hcl2/rule_transformer/rules/tokens.py | 72 +++-- hcl2/rule_transformer/transformer.py | 7 +- hcl2/rule_transformer/utils.py | 1 + 10 files changed, 365 insertions(+), 71 deletions(-) diff --git a/hcl2/const.py b/hcl2/const.py index 1d46f35a..1bd4a4ce 100644 --- a/hcl2/const.py +++ b/hcl2/const.py @@ -2,3 +2,4 @@ START_LINE_KEY = "__start_line__" END_LINE_KEY = "__end_line__" +IS_BLOCK = "__is_block__" diff --git a/hcl2/rule_transformer/deserializer.py b/hcl2/rule_transformer/deserializer.py index 5bdcf775..7b834968 100644 --- a/hcl2/rule_transformer/deserializer.py +++ b/hcl2/rule_transformer/deserializer.py @@ -1,7 +1,54 @@ import json +from functools import lru_cache from typing import Any, TextIO, List +from regex import regex + +from hcl2 import parses +from hcl2.const import IS_BLOCK from hcl2.rule_transformer.rules.abstract import LarkElement, LarkRule +from hcl2.rule_transformer.rules.base import ( + BlockRule, + AttributeRule, + BodyRule, + StartRule, +) +from hcl2.rule_transformer.rules.containers import ( + TupleRule, + ObjectRule, + ObjectElemRule, + ObjectElemKeyExpressionRule, + ObjectElemKeyDotAccessor, + ObjectElemKeyRule, +) +from hcl2.rule_transformer.rules.expressions import ExprTermRule +from hcl2.rule_transformer.rules.literal_rules import ( + IdentifierRule, + IntLitRule, + FloatLitRule, +) +from hcl2.rule_transformer.rules.strings import ( + StringRule, + InterpolationRule, + StringPartRule, +) +from hcl2.rule_transformer.rules.tokens import ( + NAME, + EQ, + DBLQUOTE, + STRING_CHARS, + ESCAPED_INTERPOLATION, + INTERP_START, + RBRACE, + IntLiteral, + FloatLiteral, + RSQB, + LSQB, + COMMA, + DOT, + LBRACE, +) +from hcl2.rule_transformer.transformer import RuleTransformer from hcl2.rule_transformer.utils import DeserializationOptions @@ -9,8 +56,13 @@ class Deserializer: def __init__(self, options=DeserializationOptions()): self.options = options + @property + @lru_cache + def _transformer(self) -> RuleTransformer: + return RuleTransformer() + def load_python(self, value: Any) -> LarkElement: - pass + return StartRule([self._deserialize(value)]) def loads(self, value: str) -> LarkElement: return self.load_python(json.loads(value)) @@ -19,13 +71,209 @@ def load(self, file: TextIO) -> LarkElement: return self.loads(file.read()) def _deserialize(self, value: Any) -> LarkElement: - pass + if isinstance(value, dict): + if self._contains_block_marker(value): + elements = self._deserialize_block_elements(value) + return BodyRule(elements) + + return self._deserialize_object(value) + + if isinstance(value, list): + return self._deserialize_list(value) + + return self._deserialize_text(value) + + def _deserialize_block_elements(self, value: dict) -> List[LarkRule]: + children = [] + + for key, value in value.items(): + if self._is_block(value): + # this value is a list of blocks, iterate over each block and deserialize them + for block in value: + children.append(self._deserialize_block(key, block)) + else: + + # otherwise it's just an attribute + if key != IS_BLOCK: + children.append(self._deserialize_attribute(key, value)) + + return children + + def _deserialize_text(self, value) -> LarkRule: + try: + int_val = int(value) + return IntLitRule([IntLiteral(int_val)]) + except ValueError: + pass + + try: + float_val = float(value) + return FloatLitRule([FloatLiteral(float_val)]) + except ValueError: + pass + + if isinstance(value, str): + if value.startswith('"') and value.endswith('"'): + return self._deserialize_string(value) + + if self._is_expression(value): + return self._deserialize_expression(value) + + return self._deserialize_identifier(value) + + elif isinstance(value, bool): + return self._deserialize_identifier(str(value).lower()) + + return self._deserialize_identifier(str(value)) + + def _deserialize_identifier(self, value: str) -> IdentifierRule: + return IdentifierRule([NAME(value)]) + + def _deserialize_string(self, value: str) -> StringRule: + result = [] + + pattern = regex.compile(r"(\${1,2}\{(?:[^{}]|(?R))*\})") + parts = [part for part in pattern.split(value) if part != ""] + # e.g. 'aaa$${bbb}ccc${"ddd-${eee}"}' -> ['aaa', '$${bbb}', 'ccc', '${"ddd-${eee}"}'] + # 'aa-${"bb-${"cc-${"dd-${5 + 5}"}"}"}' -> ['aa-', '${"bb-${"cc-${"dd-${5 + 5}"}"}"}'] + + for part in parts: + if part == '"': + continue + + if part.startswith('"'): + part = part[1:] + if part.endswith('"'): + part = part[:-1] + + e = self._deserialize_string_part(part) + result.append(e) + + return StringRule([DBLQUOTE(), *result, DBLQUOTE()]) + + def _deserialize_string_part(self, value: str) -> StringPartRule: + if value.startswith("$${") and value.endswith("}"): + return StringPartRule([ESCAPED_INTERPOLATION(value)]) + + if value.startswith("${") and value.endswith("}"): + return StringPartRule( + [ + InterpolationRule( + [INTERP_START(), self._deserialize_expression(value), RBRACE()] + ) + ] + ) + + return StringPartRule([STRING_CHARS(value)]) + + def _deserialize_expression(self, value: str) -> ExprTermRule: + """Deserialize an expression string into an ExprTermRule.""" + # instead of processing expression manually and trying to recognize what kind of expression it is, + # turn it into HCL2 code and parse it with lark: + + # unwrap from ${ and } + value = value[2:-1] + # create HCL2 snippet + value = f"temp = {value}" + # parse the above + parsed_tree = parses(value) + # transform parsed tree into LarkElement tree + rules_tree = self._transformer.transform(parsed_tree) + # extract expression from the tree + return rules_tree.body.children[0].expression + + def _deserialize_block(self, first_label: str, value: dict) -> BlockRule: + """Deserialize a block by extracting labels and body""" + labels = [first_label] + body = value + + # Keep peeling off single-key layers until we hit the body (dict with IS_BLOCK) + while isinstance(body, dict) and not body.get(IS_BLOCK): + non_block_keys = [k for k in body.keys() if k != IS_BLOCK] + if len(non_block_keys) == 1: + # This is another label level + label = non_block_keys[0] + labels.append(label) + body = body[label] + else: + # Multiple keys = this is the body + break + + return BlockRule( + [*[self._deserialize(label) for label in labels], self._deserialize(body)] + ) + + def _deserialize_attribute(self, name: str, value: Any) -> AttributeRule: + children = [ + self._deserialize_identifier(name), + EQ(), + ExprTermRule([self._deserialize(value)]), + ] + return AttributeRule(children) + + def _deserialize_list(self, value: List) -> TupleRule: + children = [] + for element in value: + deserialized = self._deserialize(element) + if not isinstance(deserialized, ExprTermRule): + # whatever an element of the list is, it has to be nested inside ExprTermRule + deserialized = ExprTermRule([deserialized]) + children.append(deserialized) + children.append(COMMA()) + + return TupleRule([LSQB(), *children, RSQB()]) + + def _deserialize_object(self, value: dict) -> ObjectRule: + children = [] + for key, value in value.items(): + children.append(self._deserialize_object_elem(key, value)) + return ObjectRule([LBRACE(), *children, RBRACE()]) + + def _deserialize_object_elem(self, key: str, value: Any) -> ObjectElemRule: + if self._is_expression(key): + key = ObjectElemKeyExpressionRule([self._deserialize_expression(key)]) + elif "." in key: + parts = key.split(".") + children = [] + for part in parts: + children.append(self._deserialize_identifier(part)) + children.append(DOT()) + key = ObjectElemKeyDotAccessor(children[:-1]) # without the last comma + else: + key = self._deserialize_text(key) + + return ObjectElemRule( + [ + ObjectElemKeyRule([key]), + EQ(), + ExprTermRule([self._deserialize_text(value)]), + ] + ) + + def _is_expression(self, value: str) -> bool: + return value.startswith("${") and value.endswith("}") + + def _is_block(self, value: Any) -> bool: + """Simple check: if it's a list containing dicts with IS_BLOCK markers""" + if not isinstance(value, list) or len(value) == 0: + return False - def _deserialize_dict(self, value: dict) -> LarkRule: - pass + # Check if any item in the list has IS_BLOCK marker (directly or nested) + for item in value: + if isinstance(item, dict) and self._contains_block_marker(item): + return True - def _deserialize_list(self, value: List) -> LarkRule: - pass + return False - def _deserialize_expression(self, value: str) -> LarkRule: - pass + def _contains_block_marker(self, obj: dict) -> bool: + """Recursively check if a dict contains IS_BLOCK marker anywhere""" + if obj.get(IS_BLOCK): + return True + for value in obj.values(): + if isinstance(value, dict) and self._contains_block_marker(value): + return True + if isinstance(value, list): + for element in value: + if self._contains_block_marker(element): + return True + return False diff --git a/hcl2/rule_transformer/rules/abstract.py b/hcl2/rule_transformer/rules/abstract.py index e32d9ddb..33dcc9ca 100644 --- a/hcl2/rule_transformer/rules/abstract.py +++ b/hcl2/rule_transformer/rules/abstract.py @@ -36,7 +36,7 @@ def serialize( class LarkToken(LarkElement, ABC): - def __init__(self, value: Union[str, int]): + def __init__(self, value: Union[str, int, float]): self._value = value super().__init__() @@ -100,7 +100,6 @@ def __init__(self, children: List[LarkElement], meta: Optional[Meta] = None): for index, child in enumerate(children): if child is not None: - print(child) child.set_index(index) child.set_parent(self) diff --git a/hcl2/rule_transformer/rules/base.py b/hcl2/rule_transformer/rules/base.py index da74954b..5c8468d4 100644 --- a/hcl2/rule_transformer/rules/base.py +++ b/hcl2/rule_transformer/rules/base.py @@ -3,9 +3,11 @@ from lark.tree import Meta -from hcl2.dict_transformer import START_LINE, END_LINE +from hcl2.const import IS_BLOCK from hcl2.rule_transformer.rules.abstract import LarkRule, LarkToken from hcl2.rule_transformer.rules.expressions import ExpressionRule +from hcl2.rule_transformer.rules.literal_rules import IdentifierRule +from hcl2.rule_transformer.rules.strings import StringRule from hcl2.rule_transformer.rules.tokens import NAME, EQ from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule @@ -42,7 +44,7 @@ class BodyRule(LarkRule): _children: List[ Union[ NewLineOrCommentRule, - # AttributeRule, + AttributeRule, "BlockRule", ] ] @@ -58,6 +60,7 @@ def serialize( attributes: List[AttributeRule] = [] comments = [] inline_comments = [] + for child in self._children: if isinstance(child, BlockRule): @@ -116,7 +119,11 @@ def serialize( class BlockRule(LarkRule): - _children: Tuple[BodyRule] + _children: Tuple[ + IdentifierRule, + Optional[Union[IdentifierRule, StringRule]], + BodyRule, + ] def __init__(self, children, meta: Optional[Meta] = None): super().__init__(children, meta) @@ -141,15 +148,11 @@ def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: result = self._body.serialize(options) + if options.explicit_blocks: + result.update({IS_BLOCK: True}) + labels = self._labels for label in reversed(labels[1:]): result = {label.serialize(options): result} - result.update( - { - START_LINE: self._meta.line, - END_LINE: self._meta.end_line, - } - ) - return result diff --git a/hcl2/rule_transformer/rules/functions.py b/hcl2/rule_transformer/rules/functions.py index 412a1667..54958514 100644 --- a/hcl2/rule_transformer/rules/functions.py +++ b/hcl2/rule_transformer/rules/functions.py @@ -78,7 +78,7 @@ def arguments(self) -> Optional[ArgumentsRule]: def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: result = ( - f"{":".join(identifier.serialize(options, context) for identifier in self.identifiers)}" + f"{"::".join(identifier.serialize(options, context) for identifier in self.identifiers)}" f"({self.arguments.serialize(options, context) if self.arguments else ""})" ) if not context.inside_dollar_string: diff --git a/hcl2/rule_transformer/rules/literal_rules.py b/hcl2/rule_transformer/rules/literal_rules.py index db7e8289..baf8546f 100644 --- a/hcl2/rule_transformer/rules/literal_rules.py +++ b/hcl2/rule_transformer/rules/literal_rules.py @@ -43,12 +43,6 @@ def lark_name() -> str: return "float_lit" -class StringPartRule(TokenRule): - @staticmethod - def lark_name() -> str: - return "string_part" - - class BinaryOperatorRule(TokenRule): @staticmethod def lark_name() -> str: diff --git a/hcl2/rule_transformer/rules/strings.py b/hcl2/rule_transformer/rules/strings.py index dc3b85b0..769ad5b9 100644 --- a/hcl2/rule_transformer/rules/strings.py +++ b/hcl2/rule_transformer/rules/strings.py @@ -1,15 +1,13 @@ -from typing import Tuple, Optional, List, Any, Union - -from lark.tree import Meta +from typing import Tuple, List, Any, Union from hcl2.rule_transformer.rules.abstract import LarkRule -from hcl2.rule_transformer.rules.expressions import ExpressionRule, ExprTermRule -from hcl2.rule_transformer.rules.literal_rules import StringPartRule +from hcl2.rule_transformer.rules.expressions import ExpressionRule from hcl2.rule_transformer.rules.tokens import ( INTERP_START, RBRACE, DBLQUOTE, STRING_CHARS, + ESCAPED_INTERPOLATION, ) from hcl2.rule_transformer.utils import ( SerializationOptions, @@ -18,41 +16,58 @@ ) -class StringRule(LarkRule): +class InterpolationRule(LarkRule): - _children: Tuple[DBLQUOTE, List[StringPartRule], DBLQUOTE] + _children: Tuple[ + INTERP_START, + ExpressionRule, + RBRACE, + ] @staticmethod def lark_name() -> str: - return "string" + return "interpolation" @property - def string_parts(self): - return self.children[1:-1] + def expression(self): + return self.children[1] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: - return '"' + "".join(part.serialize() for part in self.string_parts) + '"' + return to_dollar_string(self.expression.serialize(options)) -class InterpolationRule(LarkRule): +class StringPartRule(LarkRule): + _children: Tuple[Union[STRING_CHARS, ESCAPED_INTERPOLATION, InterpolationRule]] - _children: Tuple[ - INTERP_START, - ExpressionRule, - RBRACE, - ] + @staticmethod + def lark_name() -> str: + return "string_part" + + @property + def content(self) -> Union[STRING_CHARS, ESCAPED_INTERPOLATION, InterpolationRule]: + return self._children[0] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return self.content.serialize(options, context) + + +class StringRule(LarkRule): + + _children: Tuple[DBLQUOTE, List[StringPartRule], DBLQUOTE] @staticmethod def lark_name() -> str: - return "interpolation" + return "string" @property - def expression(self): - return self.children[1] + def string_parts(self): + return self.children[1:-1] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: - return to_dollar_string(self.expression.serialize(options)) + return '"' + "".join(part.serialize() for part in self.string_parts) + '"' diff --git a/hcl2/rule_transformer/rules/tokens.py b/hcl2/rule_transformer/rules/tokens.py index 7dd79f63..59e524f3 100644 --- a/hcl2/rule_transformer/rules/tokens.py +++ b/hcl2/rule_transformer/rules/tokens.py @@ -1,5 +1,5 @@ from functools import lru_cache -from typing import Callable, Any, Type +from typing import Callable, Any, Type, Optional, Tuple from hcl2.rule_transformer.rules.abstract import LarkToken @@ -10,9 +10,9 @@ class StringToken(LarkToken): cached subclass whose static `lark_name()` yields the given string. """ - @staticmethod + @classmethod @lru_cache(maxsize=None) - def __build_subclass(name: str) -> Type["StringToken"]: + def __build_subclass(cls, name: str) -> Type["StringToken"]: """Create a subclass with a constant `lark_name`.""" return type( # type: ignore f"{name}_TOKEN", @@ -28,7 +28,7 @@ def __class_getitem__(cls, name: str) -> Type["StringToken"]: raise TypeError("StringToken[...] expects a single str argument") return cls.__build_subclass(name) - def __init__(self, value: Any) -> None: + def __init__(self, value: Optional[Any] = None): super().__init__(value) @property @@ -36,27 +36,59 @@ def serialize_conversion(self) -> Callable[[Any], str]: return str +class StaticStringToken(LarkToken): + @classmethod + @lru_cache(maxsize=None) + def __build_subclass( + cls, name: str, default_value: str = None + ) -> Type["StringToken"]: + """Create a subclass with a constant `lark_name`.""" + + return type( # type: ignore + f"{name}_TOKEN", + (cls,), + { + "__slots__": (), + "lark_name": staticmethod(lambda _n=name: _n), + "_default_value": default_value, + }, + ) + + def __class_getitem__(cls, value: Tuple[str, str]) -> Type["StringToken"]: + name, default_value = value + return cls.__build_subclass(name, default_value) + + def __init__(self): + super().__init__(getattr(self, "_default_value")) + + @property + def serialize_conversion(self) -> Callable[[Any], str]: + return str + + # explicitly define various kinds of string-based tokens for type hinting +# variable value NAME = StringToken["NAME"] STRING_CHARS = StringToken["STRING_CHARS"] ESCAPED_INTERPOLATION = StringToken["ESCAPED_INTERPOLATION"] BINARY_OP = StringToken["BINARY_OP"] -EQ = StringToken["EQ"] -COLON = StringToken["COLON"] -LPAR = StringToken["LPAR"] -RPAR = StringToken["RPAR"] -LBRACE = StringToken["LBRACE"] -RBRACE = StringToken["RBRACE"] -DOT = StringToken["DOT"] -COMMA = StringToken["COMMA"] -ELLIPSIS = StringToken["ELLIPSIS"] -QMARK = StringToken["QMARK"] -LSQB = StringToken["LSQB"] -RSQB = StringToken["RSQB"] -INTERP_START = StringToken["INTERP_START"] -DBLQUOTE = StringToken["DBLQUOTE"] -ATTR_SPLAT = StringToken["ATTR_SPLAT"] -FULL_SPLAT = StringToken["FULL_SPLAT"] +# static value +EQ = StaticStringToken[("EQ", "=")] +COLON = StaticStringToken[("COLON", ":")] +LPAR = StaticStringToken[("LPAR", "(")] +RPAR = StaticStringToken[("RPAR", ")")] +LBRACE = StaticStringToken[("LBRACE", "{")] +RBRACE = StaticStringToken[("RBRACE", "}")] +DOT = StaticStringToken[("DOT", ".")] +COMMA = StaticStringToken[("COMMA", ",")] +ELLIPSIS = StaticStringToken[("ELLIPSIS", "...")] +QMARK = StaticStringToken[("QMARK", "?")] +LSQB = StaticStringToken[("LSQB", "[")] +RSQB = StaticStringToken[("RSQB", "]")] +INTERP_START = StaticStringToken[("INTERP_START", "${")] +DBLQUOTE = StaticStringToken[("DBLQUOTE", '"')] +ATTR_SPLAT = StaticStringToken[("ATTR_SPLAT", ".*")] +FULL_SPLAT = StaticStringToken[("FULL_SPLAT", "[*]")] class IntLiteral(LarkToken): diff --git a/hcl2/rule_transformer/transformer.py b/hcl2/rule_transformer/transformer.py index 41e970d6..a7d91605 100644 --- a/hcl2/rule_transformer/transformer.py +++ b/hcl2/rule_transformer/transformer.py @@ -1,6 +1,4 @@ # pylint: disable=missing-function-docstring,unused-argument -from typing import List, Union - from lark import Token, Tree, v_args, Transformer, Discard from lark.tree import Meta @@ -42,9 +40,12 @@ IntLitRule, IdentifierRule, BinaryOperatorRule, +) +from hcl2.rule_transformer.rules.strings import ( + InterpolationRule, + StringRule, StringPartRule, ) -from hcl2.rule_transformer.rules.strings import InterpolationRule, StringRule from hcl2.rule_transformer.rules.tokens import ( NAME, IntLiteral, diff --git a/hcl2/rule_transformer/utils.py b/hcl2/rule_transformer/utils.py index 8ffeab8b..404bdcdd 100644 --- a/hcl2/rule_transformer/utils.py +++ b/hcl2/rule_transformer/utils.py @@ -9,6 +9,7 @@ class SerializationOptions: with_meta: bool = False wrap_objects: bool = False wrap_tuples: bool = False + explicit_blocks: bool = True @dataclass From d8ac92d8f41de654218280aeb26f2cf4a45879f7 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Wed, 27 Aug 2025 11:35:56 +0200 Subject: [PATCH 08/24] add heredoc rules and deserialization; require heredoc openers to be on their on separate line in lark grammar; whitespace trimming based on current implementation in dict_transformer.py; --- hcl2/rule_transformer/deserializer.py | 32 ++++++++--- hcl2/rule_transformer/hcl2.lark | 4 +- hcl2/rule_transformer/rules/strings.py | 73 ++++++++++++++++++++++++++ hcl2/rule_transformer/rules/tokens.py | 6 ++- hcl2/rule_transformer/transformer.py | 12 ++++- hcl2/rule_transformer/utils.py | 8 ++- 6 files changed, 123 insertions(+), 12 deletions(-) diff --git a/hcl2/rule_transformer/deserializer.py b/hcl2/rule_transformer/deserializer.py index 7b834968..a17a9510 100644 --- a/hcl2/rule_transformer/deserializer.py +++ b/hcl2/rule_transformer/deserializer.py @@ -1,6 +1,6 @@ import json from functools import lru_cache -from typing import Any, TextIO, List +from typing import Any, TextIO, List, Union from regex import regex @@ -31,6 +31,8 @@ StringRule, InterpolationRule, StringPartRule, + HeredocTemplateRule, + HeredocTrimTemplateRule, ) from hcl2.rule_transformer.rules.tokens import ( NAME, @@ -47,9 +49,11 @@ COMMA, DOT, LBRACE, + HEREDOC_TRIM_TEMPLATE, + HEREDOC_TEMPLATE, ) from hcl2.rule_transformer.transformer import RuleTransformer -from hcl2.rule_transformer.utils import DeserializationOptions +from hcl2.rule_transformer.utils import DeserializationOptions, HEREDOC_TRIM_PATTERN, HEREDOC_PATTERN class Deserializer: @@ -99,7 +103,7 @@ def _deserialize_block_elements(self, value: dict) -> List[LarkRule]: return children - def _deserialize_text(self, value) -> LarkRule: + def _deserialize_text(self, value: Any) -> LarkRule: try: int_val = int(value) return IntLitRule([IntLiteral(int_val)]) @@ -114,6 +118,16 @@ def _deserialize_text(self, value) -> LarkRule: if isinstance(value, str): if value.startswith('"') and value.endswith('"'): + if not self.options.heredocs_to_strings and value.startswith('"<<-'): + match = HEREDOC_TRIM_PATTERN.match(value[1:-1]) + if match: + return self._deserialize_heredoc(value[1:-1], True) + + if not self.options.heredocs_to_strings and value.startswith('"<<'): + match = HEREDOC_PATTERN.match(value[1:-1]) + if match: + return self._deserialize_heredoc(value[1:-1], False) + return self._deserialize_string(value) if self._is_expression(value): @@ -131,11 +145,12 @@ def _deserialize_identifier(self, value: str) -> IdentifierRule: def _deserialize_string(self, value: str) -> StringRule: result = [] - - pattern = regex.compile(r"(\${1,2}\{(?:[^{}]|(?R))*\})") - parts = [part for part in pattern.split(value) if part != ""] + # split string into individual parts based on lark grammar # e.g. 'aaa$${bbb}ccc${"ddd-${eee}"}' -> ['aaa', '$${bbb}', 'ccc', '${"ddd-${eee}"}'] # 'aa-${"bb-${"cc-${"dd-${5 + 5}"}"}"}' -> ['aa-', '${"bb-${"cc-${"dd-${5 + 5}"}"}"}'] + pattern = regex.compile(r"(\${1,2}\{(?:[^{}]|(?R))*\})") + parts = [part for part in pattern.split(value) if part != ""] + for part in parts: if part == '"': @@ -166,6 +181,11 @@ def _deserialize_string_part(self, value: str) -> StringPartRule: return StringPartRule([STRING_CHARS(value)]) + def _deserialize_heredoc(self, value: str, trim: bool) -> Union[HeredocTemplateRule, HeredocTrimTemplateRule]: + if trim: + return HeredocTrimTemplateRule([HEREDOC_TRIM_TEMPLATE(value)]) + return HeredocTemplateRule([HEREDOC_TEMPLATE(value)]) + def _deserialize_expression(self, value: str) -> ExprTermRule: """Deserialize an expression string into an ExprTermRule.""" # instead of processing expression manually and trying to recognize what kind of expression it is, diff --git a/hcl2/rule_transformer/hcl2.lark b/hcl2/rule_transformer/hcl2.lark index 3f8d913e..24140ada 100644 --- a/hcl2/rule_transformer/hcl2.lark +++ b/hcl2/rule_transformer/hcl2.lark @@ -67,8 +67,8 @@ ELLIPSIS : "..." COLONS: "::" // Heredocs -HEREDOC_TEMPLATE : /<<(?P[a-zA-Z][a-zA-Z0-9._-]+)\n?(?:.|\n)*?\n\s*(?P=heredoc)\n/ -HEREDOC_TEMPLATE_TRIM : /<<-(?P[a-zA-Z][a-zA-Z0-9._-]+)\n?(?:.|\n)*?\n\s*(?P=heredoc_trim)\n/ +HEREDOC_TEMPLATE : /<<(?P[a-zA-Z][a-zA-Z0-9._-]+)\n(?:.|\n)*?\n\s*(?P=heredoc)\n/ +HEREDOC_TEMPLATE_TRIM : /<<-(?P[a-zA-Z][a-zA-Z0-9._-]+)\n(?:.|\n)*?\n\s*(?P=heredoc_trim)\n/ // Ignore whitespace (but not newlines, as they're significant in HCL) %ignore /[ \t]+/ diff --git a/hcl2/rule_transformer/rules/strings.py b/hcl2/rule_transformer/rules/strings.py index 769ad5b9..4e28e976 100644 --- a/hcl2/rule_transformer/rules/strings.py +++ b/hcl2/rule_transformer/rules/strings.py @@ -1,3 +1,4 @@ +import sys from typing import Tuple, List, Any, Union from hcl2.rule_transformer.rules.abstract import LarkRule @@ -8,11 +9,15 @@ DBLQUOTE, STRING_CHARS, ESCAPED_INTERPOLATION, + HEREDOC_TEMPLATE, + HEREDOC_TRIM_TEMPLATE, ) from hcl2.rule_transformer.utils import ( SerializationOptions, SerializationContext, to_dollar_string, + HEREDOC_TRIM_PATTERN, + HEREDOC_PATTERN, ) @@ -71,3 +76,71 @@ def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: return '"' + "".join(part.serialize() for part in self.string_parts) + '"' + + +class HeredocTemplateRule(LarkRule): + + _children: Tuple[HEREDOC_TEMPLATE] + _trim_chars = "\n\t " + + + @staticmethod + def lark_name() -> str: + return "heredoc_template" + + @property + def heredoc(self): + return self.children[0] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + heredoc = self.heredoc.serialize(options, context) + + if not options.preserve_heredocs: + match = HEREDOC_PATTERN.match(heredoc) + if not match: + raise RuntimeError(f"Invalid Heredoc token: {heredoc}") + heredoc = match.group(2) + + result = heredoc.rstrip(self._trim_chars) + return f'"{result}"' + + +class HeredocTrimTemplateRule(HeredocTemplateRule): + + _children: Tuple[HEREDOC_TRIM_TEMPLATE] + + @staticmethod + def lark_name() -> str: + return "heredoc_trim_template" + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + # See https://github.com/hashicorp/hcl2/blob/master/hcl/hclsyntax/spec.md#template-expressions + # This is a special version of heredocs that are declared with "<<-" + # This will calculate the minimum number of leading spaces in each line of a heredoc + # and then remove that number of spaces from each line + + heredoc = self.heredoc.serialize(options, context) + + if not options.preserve_heredocs: + match = HEREDOC_TRIM_PATTERN.match(heredoc) + if not match: + raise RuntimeError(f"Invalid Heredoc token: {heredoc}") + heredoc = match.group(2) + + heredoc = heredoc.rstrip(self._trim_chars) + lines = heredoc.split("\n") + + # calculate the min number of leading spaces in each line + min_spaces = sys.maxsize + for line in lines: + leading_spaces = len(line) - len(line.lstrip(" ")) + min_spaces = min(min_spaces, leading_spaces) + + # trim off that number of leading spaces from each line + lines = [line[min_spaces:] for line in lines] + return '"' + "\n".join(lines) + '"' + \ No newline at end of file diff --git a/hcl2/rule_transformer/rules/tokens.py b/hcl2/rule_transformer/rules/tokens.py index 59e524f3..5b1959f3 100644 --- a/hcl2/rule_transformer/rules/tokens.py +++ b/hcl2/rule_transformer/rules/tokens.py @@ -67,12 +67,14 @@ def serialize_conversion(self) -> Callable[[Any], str]: # explicitly define various kinds of string-based tokens for type hinting -# variable value +# variable values NAME = StringToken["NAME"] STRING_CHARS = StringToken["STRING_CHARS"] ESCAPED_INTERPOLATION = StringToken["ESCAPED_INTERPOLATION"] BINARY_OP = StringToken["BINARY_OP"] -# static value +HEREDOC_TEMPLATE = STRING_CHARS["HEREDOC_TEMPLATE"] +HEREDOC_TRIM_TEMPLATE = STRING_CHARS["HEREDOC_TRIM_TEMPLATE"] +# static values EQ = StaticStringToken[("EQ", "=")] COLON = StaticStringToken[("COLON", ":")] LPAR = StaticStringToken[("LPAR", "(")] diff --git a/hcl2/rule_transformer/transformer.py b/hcl2/rule_transformer/transformer.py index a7d91605..37ae445c 100644 --- a/hcl2/rule_transformer/transformer.py +++ b/hcl2/rule_transformer/transformer.py @@ -44,7 +44,9 @@ from hcl2.rule_transformer.rules.strings import ( InterpolationRule, StringRule, - StringPartRule, + StringPartRule, + HeredocTemplateRule, + HeredocTrimTemplateRule, ) from hcl2.rule_transformer.rules.tokens import ( NAME, @@ -127,6 +129,14 @@ def string_part(self, meta: Meta, args) -> StringPartRule: def interpolation(self, meta: Meta, args) -> InterpolationRule: return InterpolationRule(args, meta) + @v_args(meta=True) + def heredoc_template(self, meta: Meta, args) -> HeredocTemplateRule: + return HeredocTemplateRule(args, meta) + + @v_args(meta=True) + def heredoc_template_trim(self, meta: Meta, args) -> HeredocTrimTemplateRule: + return HeredocTrimTemplateRule(args, meta) + @v_args(meta=True) def expr_term(self, meta: Meta, args) -> ExprTermRule: return ExprTermRule(args, meta) diff --git a/hcl2/rule_transformer/utils.py b/hcl2/rule_transformer/utils.py index 404bdcdd..98370ca3 100644 --- a/hcl2/rule_transformer/utils.py +++ b/hcl2/rule_transformer/utils.py @@ -1,7 +1,12 @@ +import re from contextlib import contextmanager from dataclasses import dataclass, replace from typing import Generator +HEREDOC_PATTERN = re.compile(r"<<([a-zA-Z][a-zA-Z0-9._-]+)\n([\s\S]*)\1", re.S) +HEREDOC_TRIM_PATTERN = re.compile(r"<<-([a-zA-Z][a-zA-Z0-9._-]+)\n([\s\S]*)\1", re.S) + + @dataclass class SerializationOptions: @@ -10,11 +15,12 @@ class SerializationOptions: wrap_objects: bool = False wrap_tuples: bool = False explicit_blocks: bool = True + preserve_heredocs: bool = True @dataclass class DeserializationOptions: - pass + heredocs_to_strings: bool = False @dataclass From 5932662bfe5045c2e944f7c9e3fc55c94077c4c9 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Mon, 15 Sep 2025 12:26:59 +0200 Subject: [PATCH 09/24] add `for` expressions rules --- .../rule_transformer/rules/for_expressions.py | 283 ++++++++++++++++++ hcl2/rule_transformer/rules/functions.py | 2 +- hcl2/rule_transformer/rules/tokens.py | 4 + hcl2/rule_transformer/transformer.py | 36 ++- 4 files changed, 320 insertions(+), 5 deletions(-) create mode 100644 hcl2/rule_transformer/rules/for_expressions.py diff --git a/hcl2/rule_transformer/rules/for_expressions.py b/hcl2/rule_transformer/rules/for_expressions.py new file mode 100644 index 00000000..18abe6c8 --- /dev/null +++ b/hcl2/rule_transformer/rules/for_expressions.py @@ -0,0 +1,283 @@ +from typing import Any, Tuple, Optional, List + +from lark.tree import Meta + +from hcl2.rule_transformer.rules.abstract import LarkRule, LarkElement +from hcl2.rule_transformer.rules.expressions import ExpressionRule +from hcl2.rule_transformer.rules.literal_rules import IdentifierRule +from hcl2.rule_transformer.rules.tokens import ( + LSQB, + RSQB, + LBRACE, + RBRACE, + FOR, + IN, + IF, + COMMA, + COLON, + ELLIPSIS, + FOR_OBJECT_ARROW, +) +from hcl2.rule_transformer.rules.whitespace import ( + NewLineOrCommentRule, + InlineCommentMixIn, +) +from hcl2.rule_transformer.utils import ( + SerializationOptions, + SerializationContext, + to_dollar_string, +) + + +class ForIntroRule(InlineCommentMixIn): + """Rule for the intro part of for expressions: 'for key, value in collection :'""" + + _children: Tuple[ + FOR, + Optional[NewLineOrCommentRule], + IdentifierRule, + Optional[COMMA], + Optional[IdentifierRule], + Optional[NewLineOrCommentRule], + IN, + Optional[NewLineOrCommentRule], + ExpressionRule, + Optional[NewLineOrCommentRule], + COLON, + Optional[NewLineOrCommentRule], + ] + + @staticmethod + def lark_name() -> str: + return "for_intro" + + def __init__(self, children, meta: Optional[Meta] = None): + # Insert null comments at positions where they might be missing + self._possibly_insert_null_second_identifier(children) + self._possibly_insert_null_comments(children, [1, 5, 7, 9, 11]) + super().__init__(children, meta) + + def _possibly_insert_null_second_identifier(self, children: List[LarkRule]): + second_identifier_present = ( + len([child for child in children if isinstance(child, IdentifierRule)]) == 2 + ) + if not second_identifier_present: + children.insert(3, None) + children.insert(4, None) + + @property + def first_iterator(self) -> IdentifierRule: + """Returns the first iterator""" + return self._children[2] + + @property + def second_iterator(self) -> Optional[IdentifierRule]: + """Returns the second iterator or None if not present""" + return self._children[4] + + @property + def iterable(self) -> ExpressionRule: + """Returns the collection expression being iterated over""" + return self._children[8] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> str: + result = "for " + + result += f"{self.first_iterator.serialize(options, context)}" + if self.second_iterator: + result += f", {self.second_iterator.serialize(options, context)}" + + result += f" in {self.iterable.serialize(options, context)} : " + + return result + + +class ForCondRule(InlineCommentMixIn): + """Rule for the optional condition in for expressions: 'if condition'""" + + _children: Tuple[ + IF, + Optional[NewLineOrCommentRule], + ExpressionRule, # condition expression + ] + + @staticmethod + def lark_name() -> str: + return "for_cond" + + def __init__(self, children, meta: Optional[Meta] = None): + self._possibly_insert_null_comments(children, [1]) + super().__init__(children, meta) + + @property + def condition_expr(self) -> ExpressionRule: + """Returns the condition expression""" + return self._children[2] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> str: + return f"if {self.condition_expr.serialize(options, context)}" + + +class ForTupleExprRule(ExpressionRule): + """Rule for tuple/array for expressions: [for item in items : expression]""" + + _children: Tuple[ + LSQB, + Optional[NewLineOrCommentRule], + ForIntroRule, + Optional[NewLineOrCommentRule], + ExpressionRule, + Optional[NewLineOrCommentRule], + Optional[ForCondRule], + Optional[NewLineOrCommentRule], + RSQB, + ] + + @staticmethod + def lark_name() -> str: + return "for_tuple_expr" + + def __init__(self, children, meta: Optional[Meta] = None): + self._possibly_insert_null_comments(children, [1, 3, 5, 7]) + self._possibly_insert_null_condition(children) + super().__init__(children, meta) + + def _possibly_insert_null_condition(self, children: List[LarkElement]): + if not len([child for child in children if isinstance(child, ForCondRule)]): + children.insert(6, None) + + @property + def for_intro(self) -> ForIntroRule: + """Returns the for intro rule""" + return self._children[2] + + @property + def value_expr(self) -> ExpressionRule: + """Returns the value expression""" + return self._children[4] + + @property + def condition(self) -> Optional[ForCondRule]: + """Returns the optional condition rule""" + return self._children[6] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + + result = "[" + + with context.modify(inside_dollar_string=True): + result += self.for_intro.serialize(options, context) + result += self.value_expr.serialize(options, context) + + if self.condition is not None: + result += f" {self.condition.serialize(options, context)}" + + result += "]" + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result + + +class ForObjectExprRule(ExpressionRule): + """Rule for object for expressions: {for key, value in items : key => value}""" + + _children: Tuple[ + LBRACE, + Optional[NewLineOrCommentRule], + ForIntroRule, + Optional[NewLineOrCommentRule], + ExpressionRule, + FOR_OBJECT_ARROW, + Optional[NewLineOrCommentRule], + ExpressionRule, + Optional[NewLineOrCommentRule], + Optional[ELLIPSIS], + Optional[NewLineOrCommentRule], + Optional[ForCondRule], + Optional[NewLineOrCommentRule], + RBRACE, + ] + + @staticmethod + def lark_name() -> str: + return "for_object_expr" + + def __init__(self, children, meta: Optional[Meta] = None): + self._possibly_insert_null_comments(children, [1, 3, 6, 8, 10, 12]) + self._possibly_insert_null_optionals(children) + super().__init__(children, meta) + + def _possibly_insert_null_optionals(self, children: List[LarkElement]): + has_ellipsis = False + has_condition = False + + for child in children: + # if not has_ellipsis and isinstance(child, ELLIPSIS): + if ( + has_ellipsis is False + and child is not None + and child.lark_name() == ELLIPSIS.lark_name() + ): + has_ellipsis = True + if not has_condition and isinstance(child, ForCondRule): + has_condition = True + + if not has_ellipsis: + children.insert(9, None) + + if not has_condition: + children.insert(11, None) + + @property + def for_intro(self) -> ForIntroRule: + """Returns the for intro rule""" + return self._children[2] + + @property + def key_expr(self) -> ExpressionRule: + """Returns the key expression""" + return self._children[4] + + @property + def value_expr(self) -> ExpressionRule: + """Returns the value expression""" + return self._children[7] + + @property + def ellipsis(self) -> Optional[ELLIPSIS]: + """Returns the optional ellipsis token""" + return self._children[9] + + @property + def condition(self) -> Optional[ForCondRule]: + """Returns the optional condition rule""" + return self._children[11] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + result = "{" + with context.modify(inside_dollar_string=True): + result += self.for_intro.serialize(options, context) + result += f"{self.key_expr.serialize(options, context)} => " + + result += self.value_expr.serialize( + SerializationOptions(wrap_objects=True), context + ) + + if self.ellipsis is not None: + result += self.ellipsis.serialize(options, context) + + if self.condition is not None: + result += f" {self.condition.serialize(options, context)}" + + result += "}" + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result diff --git a/hcl2/rule_transformer/rules/functions.py b/hcl2/rule_transformer/rules/functions.py index 54958514..b25fed62 100644 --- a/hcl2/rule_transformer/rules/functions.py +++ b/hcl2/rule_transformer/rules/functions.py @@ -40,7 +40,7 @@ def arguments(self) -> List[ExpressionRule]: return [child for child in self._children if isinstance(child, ExpressionRule)] def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: - result = ", ".join([argument.serialize(options, context) for argument in self.arguments]) + result = ", ".join([str(argument.serialize(options, context)) for argument in self.arguments]) if self.has_ellipsis: result += " ..." return result diff --git a/hcl2/rule_transformer/rules/tokens.py b/hcl2/rule_transformer/rules/tokens.py index 5b1959f3..67d53fcf 100644 --- a/hcl2/rule_transformer/rules/tokens.py +++ b/hcl2/rule_transformer/rules/tokens.py @@ -91,6 +91,10 @@ def serialize_conversion(self) -> Callable[[Any], str]: DBLQUOTE = StaticStringToken[("DBLQUOTE", '"')] ATTR_SPLAT = StaticStringToken[("ATTR_SPLAT", ".*")] FULL_SPLAT = StaticStringToken[("FULL_SPLAT", "[*]")] +FOR = StaticStringToken[("FOR", "for")] +IN = StaticStringToken[("IN", "in")] +IF = StaticStringToken[("IF", "if")] +FOR_OBJECT_ARROW = StaticStringToken[("FOR_OBJECT_ARROW", "=>")] class IntLiteral(LarkToken): diff --git a/hcl2/rule_transformer/transformer.py b/hcl2/rule_transformer/transformer.py index 37ae445c..1ab1dfda 100644 --- a/hcl2/rule_transformer/transformer.py +++ b/hcl2/rule_transformer/transformer.py @@ -23,6 +23,12 @@ ExprTermRule, ConditionalRule, ) +from hcl2.rule_transformer.rules.for_expressions import ( + ForTupleExprRule, + ForObjectExprRule, + ForIntroRule, + ForCondRule, +) from hcl2.rule_transformer.rules.functions import ArgumentsRule, FunctionCallRule from hcl2.rule_transformer.rules.indexing import ( IndexExprTermRule, @@ -40,12 +46,13 @@ IntLitRule, IdentifierRule, BinaryOperatorRule, + KeywordRule, ) from hcl2.rule_transformer.rules.strings import ( InterpolationRule, StringRule, - StringPartRule, - HeredocTemplateRule, + StringPartRule, + HeredocTemplateRule, HeredocTrimTemplateRule, ) from hcl2.rule_transformer.rules.tokens import ( @@ -72,6 +79,7 @@ def __init__(self, discard_new_line_or_comments: bool = False): self.discard_new_line_or_comments = discard_new_line_or_comments def __default_token__(self, token: Token) -> StringToken: + # TODO make this return StaticStringToken where applicable return StringToken[token.type](token.value) def FLOAT_LITERAL(self, token: Token) -> FloatLiteral: @@ -109,6 +117,10 @@ def new_line_or_comment(self, meta: Meta, args) -> NewLineOrCommentRule: def identifier(self, meta: Meta, args) -> IdentifierRule: return IdentifierRule(args, meta) + @v_args(meta=True) + def keyword(self, meta: Meta, args) -> KeywordRule: + return KeywordRule(args, meta) + @v_args(meta=True) def int_lit(self, meta: Meta, args) -> IntLitRule: return IntLitRule(args, meta) @@ -132,11 +144,11 @@ def interpolation(self, meta: Meta, args) -> InterpolationRule: @v_args(meta=True) def heredoc_template(self, meta: Meta, args) -> HeredocTemplateRule: return HeredocTemplateRule(args, meta) - + @v_args(meta=True) def heredoc_template_trim(self, meta: Meta, args) -> HeredocTrimTemplateRule: return HeredocTrimTemplateRule(args, meta) - + @v_args(meta=True) def expr_term(self, meta: Meta, args) -> ExprTermRule: return ExprTermRule(args, meta) @@ -236,3 +248,19 @@ def full_splat(self, meta: Meta, args) -> FullSplatRule: @v_args(meta=True) def full_splat_expr_term(self, meta: Meta, args) -> FullSplatExprTermRule: return FullSplatExprTermRule(args, meta) + + @v_args(meta=True) + def for_tuple_expr(self, meta: Meta, args) -> ForTupleExprRule: + return ForTupleExprRule(args, meta) + + @v_args(meta=True) + def for_object_expr(self, meta: Meta, args) -> ForObjectExprRule: + return ForObjectExprRule(args, meta) + + @v_args(meta=True) + def for_intro(self, meta: Meta, args) -> ForIntroRule: + return ForIntroRule(args, meta) + + @v_args(meta=True) + def for_cond(self, meta: Meta, args) -> ForCondRule: + return ForCondRule(args, meta) From 107fcb223f176793e04aa750f2c120cb38d00afa Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Mon, 29 Sep 2025 13:10:35 +0200 Subject: [PATCH 10/24] add Lark AST -> HCL2 reconstructor and LarkTree formatter; various other fixes and changes: * preserve order of serialized attributes and blocks * make RuleTransformer.__default_token__ differentiate between StringToken and StaticStringToken * add separate ProviderFunctionCallRule class for more accurate reconstruction --- hcl2/rule_transformer/deserializer.py | 120 +++++--- hcl2/rule_transformer/formatter.py | 262 ++++++++++++++++++ hcl2/rule_transformer/reconstructor.py | 204 ++++++++++++++ hcl2/rule_transformer/rules/abstract.py | 5 +- hcl2/rule_transformer/rules/base.py | 48 ++-- hcl2/rule_transformer/rules/containers.py | 69 +++-- hcl2/rule_transformer/rules/expressions.py | 9 +- .../rule_transformer/rules/for_expressions.py | 92 +++--- hcl2/rule_transformer/rules/functions.py | 63 +++-- hcl2/rule_transformer/rules/indexing.py | 2 +- hcl2/rule_transformer/rules/tokens.py | 12 +- hcl2/rule_transformer/rules/whitespace.py | 9 +- hcl2/rule_transformer/transformer.py | 3 + hcl2/rule_transformer/utils.py | 6 - 14 files changed, 738 insertions(+), 166 deletions(-) create mode 100644 hcl2/rule_transformer/formatter.py create mode 100644 hcl2/rule_transformer/reconstructor.py diff --git a/hcl2/rule_transformer/deserializer.py b/hcl2/rule_transformer/deserializer.py index a17a9510..56e1ad44 100644 --- a/hcl2/rule_transformer/deserializer.py +++ b/hcl2/rule_transformer/deserializer.py @@ -1,6 +1,8 @@ import json +from abc import ABC, abstractmethod +from dataclasses import dataclass from functools import lru_cache -from typing import Any, TextIO, List, Union +from typing import Any, TextIO, List, Union, Optional from regex import regex @@ -31,7 +33,7 @@ StringRule, InterpolationRule, StringPartRule, - HeredocTemplateRule, + HeredocTemplateRule, HeredocTrimTemplateRule, ) from hcl2.rule_transformer.rules.tokens import ( @@ -51,14 +53,38 @@ LBRACE, HEREDOC_TRIM_TEMPLATE, HEREDOC_TEMPLATE, + COLON, ) +from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule from hcl2.rule_transformer.transformer import RuleTransformer -from hcl2.rule_transformer.utils import DeserializationOptions, HEREDOC_TRIM_PATTERN, HEREDOC_PATTERN +from hcl2.rule_transformer.utils import HEREDOC_TRIM_PATTERN, HEREDOC_PATTERN -class Deserializer: - def __init__(self, options=DeserializationOptions()): - self.options = options +@dataclass +class DeserializerOptions: + heredocs_to_strings: bool = False + indent_length: int = 2 + object_elements_colon: bool = False + object_elements_trailing_comma: bool = True + + +class LarkElementTreeDeserializer(ABC): + def __init__(self, options: DeserializerOptions = None): + self.options = options or DeserializerOptions() + + @abstractmethod + def loads(self, value: str) -> LarkElement: + raise NotImplementedError() + + def load(self, file: TextIO) -> LarkElement: + return self.loads(file.read()) + + +class BaseDeserializer(LarkElementTreeDeserializer): + def __init__(self, options=None): + super().__init__(options) + self._current_line = 1 + self._last_new_line: Optional[NewLineOrCommentRule] = None @property @lru_cache @@ -66,19 +92,23 @@ def _transformer(self) -> RuleTransformer: return RuleTransformer() def load_python(self, value: Any) -> LarkElement: - return StartRule([self._deserialize(value)]) + result = StartRule([self._deserialize(value)]) + return result def loads(self, value: str) -> LarkElement: return self.load_python(json.loads(value)) - def load(self, file: TextIO) -> LarkElement: - return self.loads(file.read()) - def _deserialize(self, value: Any) -> LarkElement: if isinstance(value, dict): if self._contains_block_marker(value): - elements = self._deserialize_block_elements(value) - return BodyRule(elements) + + children = [] + + block_elements = self._deserialize_block_elements(value) + for element in block_elements: + children.append(element) + + return BodyRule(children) return self._deserialize_object(value) @@ -89,14 +119,13 @@ def _deserialize(self, value: Any) -> LarkElement: def _deserialize_block_elements(self, value: dict) -> List[LarkRule]: children = [] - for key, value in value.items(): if self._is_block(value): # this value is a list of blocks, iterate over each block and deserialize them for block in value: children.append(self._deserialize_block(key, block)) - else: + else: # otherwise it's just an attribute if key != IS_BLOCK: children.append(self._deserialize_attribute(key, value)) @@ -106,28 +135,24 @@ def _deserialize_block_elements(self, value: dict) -> List[LarkRule]: def _deserialize_text(self, value: Any) -> LarkRule: try: int_val = int(value) + if "." in str(value): + return FloatLitRule([FloatLiteral(float(value))]) return IntLitRule([IntLiteral(int_val)]) except ValueError: pass - try: - float_val = float(value) - return FloatLitRule([FloatLiteral(float_val)]) - except ValueError: - pass - if isinstance(value, str): if value.startswith('"') and value.endswith('"'): if not self.options.heredocs_to_strings and value.startswith('"<<-'): match = HEREDOC_TRIM_PATTERN.match(value[1:-1]) if match: return self._deserialize_heredoc(value[1:-1], True) - + if not self.options.heredocs_to_strings and value.startswith('"<<'): match = HEREDOC_PATTERN.match(value[1:-1]) if match: return self._deserialize_heredoc(value[1:-1], False) - + return self._deserialize_string(value) if self._is_expression(value): @@ -151,7 +176,6 @@ def _deserialize_string(self, value: str) -> StringRule: pattern = regex.compile(r"(\${1,2}\{(?:[^{}]|(?R))*\})") parts = [part for part in pattern.split(value) if part != ""] - for part in parts: if part == '"': continue @@ -181,10 +205,12 @@ def _deserialize_string_part(self, value: str) -> StringPartRule: return StringPartRule([STRING_CHARS(value)]) - def _deserialize_heredoc(self, value: str, trim: bool) -> Union[HeredocTemplateRule, HeredocTrimTemplateRule]: + def _deserialize_heredoc( + self, value: str, trim: bool + ) -> Union[HeredocTemplateRule, HeredocTrimTemplateRule]: if trim: return HeredocTrimTemplateRule([HEREDOC_TRIM_TEMPLATE(value)]) - return HeredocTemplateRule([HEREDOC_TEMPLATE(value)]) + return HeredocTemplateRule([HEREDOC_TEMPLATE(value)]) def _deserialize_expression(self, value: str) -> ExprTermRule: """Deserialize an expression string into an ExprTermRule.""" @@ -200,7 +226,9 @@ def _deserialize_expression(self, value: str) -> ExprTermRule: # transform parsed tree into LarkElement tree rules_tree = self._transformer.transform(parsed_tree) # extract expression from the tree - return rules_tree.body.children[0].expression + result = rules_tree.body.children[0].expression + + return result def _deserialize_block(self, first_label: str, value: dict) -> BlockRule: """Deserialize a block by extracting labels and body""" @@ -220,14 +248,24 @@ def _deserialize_block(self, first_label: str, value: dict) -> BlockRule: break return BlockRule( - [*[self._deserialize(label) for label in labels], self._deserialize(body)] + [ + *[self._deserialize(label) for label in labels], + LBRACE(), + self._deserialize(body), + RBRACE(), + ] ) def _deserialize_attribute(self, name: str, value: Any) -> AttributeRule: + expr_term = self._deserialize(value) + + if not isinstance(expr_term, ExprTermRule): + expr_term = ExprTermRule([expr_term]) + children = [ self._deserialize_identifier(name), EQ(), - ExprTermRule([self._deserialize(value)]), + expr_term, ] return AttributeRule(children) @@ -247,11 +285,21 @@ def _deserialize_object(self, value: dict) -> ObjectRule: children = [] for key, value in value.items(): children.append(self._deserialize_object_elem(key, value)) + + if self.options.object_elements_trailing_comma: + children.append(COMMA()) + return ObjectRule([LBRACE(), *children, RBRACE()]) def _deserialize_object_elem(self, key: str, value: Any) -> ObjectElemRule: if self._is_expression(key): - key = ObjectElemKeyExpressionRule([self._deserialize_expression(key)]) + key = ObjectElemKeyExpressionRule( + [ + child + for child in self._deserialize_expression(key).children + if child is not None + ] + ) elif "." in key: parts = key.split(".") children = [] @@ -262,13 +310,13 @@ def _deserialize_object_elem(self, key: str, value: Any) -> ObjectElemRule: else: key = self._deserialize_text(key) - return ObjectElemRule( - [ - ObjectElemKeyRule([key]), - EQ(), - ExprTermRule([self._deserialize_text(value)]), - ] - ) + result = [ + ObjectElemKeyRule([key]), + COLON() if self.options.object_elements_colon else EQ(), + ExprTermRule([self._deserialize(value)]), + ] + + return ObjectElemRule(result) def _is_expression(self, value: str) -> bool: return value.startswith("${") and value.endswith("}") diff --git a/hcl2/rule_transformer/formatter.py b/hcl2/rule_transformer/formatter.py new file mode 100644 index 00000000..ad0247dc --- /dev/null +++ b/hcl2/rule_transformer/formatter.py @@ -0,0 +1,262 @@ +from abc import ABC, abstractmethod +from dataclasses import dataclass +from typing import List + +from hcl2.rule_transformer.rules.abstract import LarkElement +from hcl2.rule_transformer.rules.base import ( + StartRule, + BlockRule, + AttributeRule, + BodyRule, +) +from hcl2.rule_transformer.rules.containers import ObjectRule, ObjectElemRule, TupleRule +from hcl2.rule_transformer.rules.expressions import ExprTermRule, ExpressionRule +from hcl2.rule_transformer.rules.for_expressions import ( + ForTupleExprRule, + ForObjectExprRule, +) +from hcl2.rule_transformer.rules.tokens import NL_OR_COMMENT, LBRACE, COLON, LSQB, COMMA +from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule + + +@dataclass +class FormatterOptions: + indent_length: int = 2 + open_empty_blocks: bool = True + open_empty_objects: bool = True + open_empty_tuples: bool = False + + vertically_align_attributes: bool = True + vertically_align_object_elements: bool = True + + +class LarkElementTreeFormatter(ABC): + def __init__(self, options: FormatterOptions = None): + self.options = options or FormatterOptions() + + @abstractmethod + def format_tree(self, tree: LarkElement): + raise NotImplementedError() + + +class BaseFormatter(LarkElementTreeFormatter): + def __init__(self, options: FormatterOptions = None): + super().__init__(options) + self._current_line = 1 + self._current_indent_level = 0 + + def format_tree(self, tree: LarkElement): + if isinstance(tree, StartRule): + self.format_start_rule(tree) + + def format_start_rule(self, rule: StartRule): + self.format_body_rule(rule.body, 0) + # for child in rule.body.children: + # if isinstance(child, BlockRule): + # self.format_block_rule(child, 1) + + def format_block_rule(self, rule: BlockRule, indent_level: int = 0): + if self.options.vertically_align_attributes: + self._vertically_align_attributes_in_body(rule.body) + + self.format_body_rule(rule.body, indent_level) + if len(rule.body.children) > 0: + rule.children.insert(-1, self._build_newline(indent_level - 1)) + elif self.options.open_empty_blocks: + rule.children.insert(-1, self._build_newline(indent_level - 1, 2)) + + def format_body_rule(self, rule: BodyRule, indent_level: int = 0): + + in_start = isinstance(rule.parent, StartRule) + + new_children = [] + if not in_start: + new_children.append(self._build_newline(indent_level)) + + for i, child in enumerate(rule.children): + new_children.append(child) + + if isinstance(child, AttributeRule): + self.format_attribute_rule(child, indent_level) + new_children.append(self._build_newline(indent_level)) + + if isinstance(child, BlockRule): + self.format_block_rule(child, indent_level + 1) + + if i > 0: + new_children.insert(-2, self._build_newline(indent_level)) + new_children.append(self._build_newline(indent_level, 2)) + + new_children.pop(-1) + rule._children = new_children + + def format_attribute_rule(self, rule: AttributeRule, indent_level: int = 0): + self.format_expression(rule.expression, indent_level + 1) + + def format_tuple_rule(self, rule: TupleRule, indent_level: int = 0): + if len(rule.elements) == 0: + if self.options.open_empty_tuples: + rule.children.insert(1, self._build_newline(indent_level - 1, 2)) + return + + new_children = [] + for child in rule.children: + new_children.append(child) + if isinstance(child, ExprTermRule): + self.format_expression(child, indent_level + 1) + + if isinstance(child, (COMMA, LSQB)): + new_children.append(self._build_newline(indent_level)) + + self._deindent_last_line() + rule._children = new_children + + def format_object_rule(self, rule: ObjectRule, indent_level: int = 0): + if len(rule.elements) == 0: + if self.options.open_empty_objects: + rule.children.insert(1, self._build_newline(indent_level - 1, 2)) + return + + new_children = [] + for i in range(len(rule.children)): + child = rule.children[i] + next_child = rule.children[i + 1] if i + 1 < len(rule.children) else None + new_children.append(child) + + if isinstance(child, LBRACE): + new_children.append(self._build_newline(indent_level)) + + if ( + next_child + and isinstance(next_child, ObjectElemRule) + and isinstance(child, (ObjectElemRule, COMMA)) + ): + new_children.append(self._build_newline(indent_level)) + + if isinstance(child, ObjectElemRule): + self.format_expression(child.expression, indent_level + 1) + + new_children.insert(-1, self._build_newline(indent_level)) + self._deindent_last_line() + + rule._children = new_children + + if self.options.vertically_align_object_elements: + self._vertically_align_object_elems(rule) + + def format_expression(self, rule: ExprTermRule, indent_level: int = 0): + if isinstance(rule.expression, ObjectRule): + self.format_object_rule(rule.expression, indent_level) + + elif isinstance(rule.expression, TupleRule): + self.format_tuple_rule(rule.expression, indent_level) + + elif isinstance(rule.expression, ForTupleExprRule): + self.format_fortupleexpr(rule.expression, indent_level) + + elif isinstance(rule.expression, ForObjectExprRule): + self.format_forobjectexpr(rule.expression, indent_level) + + elif isinstance(rule.expression, ExprTermRule): + self.format_expression(rule.expression) + + def format_fortupleexpr(self, expression: ForTupleExprRule, indent_level: int = 0): + for child in expression.children: + if isinstance(child, ExprTermRule): + self.format_expression(child, indent_level + 1) + + indexes = [1, 3, 5, 7] + for index in indexes: + expression.children[index] = self._build_newline(indent_level) + self._deindent_last_line() + # expression.children[8] = self._build_newline(indent_level - 1) + + def format_forobjectexpr( + self, expression: ForObjectExprRule, indent_level: int = 0 + ): + for child in expression.children: + if isinstance(child, ExprTermRule): + self.format_expression(child, indent_level + 1) + + indexes = [1, 3, 12] + for index in indexes: + expression.children[index] = self._build_newline(indent_level) + + self._deindent_last_line() + + def _vertically_align_attributes_in_body(self, body: BodyRule): + attributes_sequence: List[AttributeRule] = [] + + for child in body.children: + if isinstance(child, AttributeRule): + attributes_sequence.append(child) + + elif attributes_sequence: + max_length = max( + len(attribute.identifier.token.value) + for attribute in attributes_sequence + ) + for attribute in attributes_sequence: + name_length = len(attribute.identifier.token.value) + spaces_to_add = max_length - name_length + attribute.children[1].set_value( + " " * spaces_to_add + attribute.children[1].value + ) + attributes_sequence = [] + + def _vertically_align_object_elems(self, rule: ObjectRule): + max_length = max(len(elem.key.serialize()) for elem in rule.elements) + for elem in rule.elements: + key_length = len(elem.key.serialize()) + print(elem.key.serialize(), key_length) + + spaces_to_add = max_length - key_length + + separator = elem.children[1] + if isinstance(separator, COLON): + spaces_to_add += 1 + + elem.children[1].set_value(" " * spaces_to_add + separator.value) + + def _move_to_next_line(self, times: int = 1): + self._current_line += times + + def _increase_indent_level(self, times: int = 1): + self._current_indent_level += times + + def _decrease_indent_level(self, times: int = 1): + self._current_indent_level -= times + if self._current_indent_level < 0: + self._current_indent_level = 0 + + def _build_newline( + self, next_line_indent: int = 0, count: int = 1 + ) -> NewLineOrCommentRule: + result = NewLineOrCommentRule( + [ + NL_OR_COMMENT( + ("\n" * count) + " " * self.options.indent_length * next_line_indent + ) + ] + ) + self._last_new_line = result + return result + + def _deindent_last_line(self, times: int = 1): + token = self._last_new_line.token + for i in range(times): + if token.value.endswith(" " * self.options.indent_length): + token.set_value(token.value[: -self.options.indent_length]) + + # def _build_meta(self, indent_level: int = 0, length: int = 0) -> Meta: + # result = Meta() + # result.empty = length == 0 + # result.line = self._current_line + # result.column = indent_level * self.options.indent_length + # # result.start_pos = + # # result.end_line = + # # result.end_column = + # # result.end_pos = + # # result.orig_expansion = + # # result.match_tree = + # return result diff --git a/hcl2/rule_transformer/reconstructor.py b/hcl2/rule_transformer/reconstructor.py new file mode 100644 index 00000000..7d316b2c --- /dev/null +++ b/hcl2/rule_transformer/reconstructor.py @@ -0,0 +1,204 @@ +from typing import List, Union + +from lark import Tree, Token +from hcl2.rule_transformer.rules import tokens +from hcl2.rule_transformer.rules.base import BlockRule +from hcl2.rule_transformer.rules.for_expressions import ForIntroRule +from hcl2.rule_transformer.rules.literal_rules import IdentifierRule +from hcl2.rule_transformer.rules.strings import StringRule +from hcl2.rule_transformer.rules.expressions import ExprTermRule, ConditionalRule + + +class HCLReconstructor: + """This class converts a Lark.Tree AST back into a string representing the underlying HCL code.""" + + def __init__(self): + self._reset_state() + + def _reset_state(self): + """State tracking for formatting decisions""" + self._last_was_space = True + self._current_indent = 0 + self._last_token_name = None + self._last_rule_name = None + self._in_parentheses = False + self._in_object = False + self._in_tuple = False + + def _should_add_space_before( + self, current_node: Union[Tree, Token], parent_rule_name: str = None + ) -> bool: + """Determine if we should add a space before the current token/rule.""" + + # Don't add space if we already have one + if self._last_was_space: + return False + + # Don't add space at the beginning + if self._last_token_name is None: + return False + + if isinstance(current_node, Token): + token_type = current_node.type + + # Space before '{' in blocks + if ( + token_type == tokens.LBRACE.lark_name() + and parent_rule_name == BlockRule.lark_name() + ): + return True + + # Space around Conditional Expression operators + if ( + parent_rule_name == ConditionalRule.lark_name() + and token_type in [tokens.COLON.lark_name(), tokens.QMARK.lark_name()] + or self._last_token_name + in [tokens.COLON.lark_name(), tokens.QMARK.lark_name()] + ): + return True + + # Space after + if ( + parent_rule_name == ForIntroRule.lark_name() + and token_type == tokens.COLON.lark_name() + ): + + return True + + # Space after commas in tuples and function arguments... + if self._last_token_name == tokens.COMMA.lark_name(): + # ... except for last comma + if token_type == tokens.RSQB.lark_name(): + return False + return True + + if token_type in [ + tokens.FOR.lark_name(), + tokens.IN.lark_name(), + tokens.IF.lark_name(), + tokens.ELLIPSIS.lark_name(), + ]: + return True + + if ( + self._last_token_name + in [ + tokens.FOR.lark_name(), + tokens.IN.lark_name(), + tokens.IF.lark_name(), + ] + and token_type != "NL_OR_COMMENT" + ): + return True + + # Space around for_object arrow + if tokens.FOR_OBJECT_ARROW.lark_name() in [ + token_type, + self._last_token_name, + ]: + return True + + # Space after ellipsis in function arguments + if self._last_token_name == tokens.ELLIPSIS.lark_name(): + return True + + if tokens.EQ.lark_name() in [token_type, self._last_token_name]: + return True + + # space around binary operators + if tokens.BINARY_OP.lark_name() in [token_type, self._last_token_name]: + return True + + elif isinstance(current_node, Tree): + rule_name = current_node.data + + if parent_rule_name == BlockRule.lark_name(): + # Add space between multiple string/identifier labels in blocks + if rule_name in [ + StringRule.lark_name(), + IdentifierRule.lark_name(), + ] and self._last_rule_name in [ + StringRule.lark_name(), + IdentifierRule.lark_name(), + ]: + return True + + return False + + def _reconstruct_tree(self, tree: Tree, parent_rule_name: str = None) -> List[str]: + """Recursively reconstruct a Tree node into HCL text fragments.""" + result = [] + rule_name = tree.data + + if rule_name == ExprTermRule.lark_name(): + # Check if parenthesized + if ( + len(tree.children) >= 3 + and isinstance(tree.children[0], Token) + and tree.children[0].type == tokens.LPAR.lark_name() + and isinstance(tree.children[-1], Token) + and tree.children[-1].type == tokens.RPAR.lark_name() + ): + self._in_parentheses = True + + for child in tree.children: + result.extend(self._reconstruct_node(child, rule_name)) + + self._in_parentheses = False + + else: + for child in tree.children: + result.extend(self._reconstruct_node(child, rule_name)) + + if self._should_add_space_before(tree, parent_rule_name): + result.insert(0, " ") + + # Update state tracking + self._last_rule_name = rule_name + if result: + self._last_was_space = result[-1].endswith(" ") or result[-1].endswith("\n") + + return result + + def _reconstruct_token(self, token: Token, parent_rule_name: str = None) -> str: + """Reconstruct a Token node into HCL text fragments.""" + result = str(token.value) + if self._should_add_space_before(token, parent_rule_name): + result = " " + result + + self._last_token_name = token.type + self._last_was_space = result[-1].endswith(" ") or result[-1].endswith("\n") + + return result + + def _reconstruct_node( + self, node: Union[Tree, Token], parent_rule_name: str = None + ) -> List[str]: + """Reconstruct any node (Tree or Token) into HCL text fragments.""" + if isinstance(node, Tree): + return self._reconstruct_tree(node, parent_rule_name) + elif isinstance(node, Token): + return [self._reconstruct_token(node, parent_rule_name)] + else: + # Fallback: convert to string + return [str(node)] + + def reconstruct(self, tree: Tree, postproc=None, insert_spaces=False) -> str: + """Convert a Lark.Tree AST back into a string representation of HCL.""" + # Reset state + self._reset_state() + + # Reconstruct the tree + fragments = self._reconstruct_node(tree) + + # Join fragments and apply post-processing + result = "".join(fragments) + + if postproc: + result = postproc(result) + + # Ensure file ends with newline + if result and not result.endswith("\n"): + result += "\n" + + return result diff --git a/hcl2/rule_transformer/rules/abstract.py b/hcl2/rule_transformer/rules/abstract.py index 33dcc9ca..e83fed2b 100644 --- a/hcl2/rule_transformer/rules/abstract.py +++ b/hcl2/rule_transformer/rules/abstract.py @@ -49,6 +49,9 @@ def serialize_conversion(self) -> Callable: def value(self): return self._value + def set_value(self, value: Any): + self._value = value + def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: @@ -96,7 +99,7 @@ def to_lark(self) -> Tree: def __init__(self, children: List[LarkElement], meta: Optional[Meta] = None): super().__init__() self._children = children - self._meta = meta + self._meta = meta or Meta() for index, child in enumerate(children): if child is not None: diff --git a/hcl2/rule_transformer/rules/base.py b/hcl2/rule_transformer/rules/base.py index 5c8468d4..c879b772 100644 --- a/hcl2/rule_transformer/rules/base.py +++ b/hcl2/rule_transformer/rules/base.py @@ -5,10 +5,10 @@ from hcl2.const import IS_BLOCK from hcl2.rule_transformer.rules.abstract import LarkRule, LarkToken -from hcl2.rule_transformer.rules.expressions import ExpressionRule +from hcl2.rule_transformer.rules.expressions import ExpressionRule, ExprTermRule from hcl2.rule_transformer.rules.literal_rules import IdentifierRule from hcl2.rule_transformer.rules.strings import StringRule -from hcl2.rule_transformer.rules.tokens import NAME, EQ +from hcl2.rule_transformer.rules.tokens import NAME, EQ, LBRACE, RBRACE from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule from hcl2.rule_transformer.utils import SerializationOptions, SerializationContext @@ -16,9 +16,9 @@ class AttributeRule(LarkRule): _children: Tuple[ - NAME, + IdentifierRule, EQ, - ExpressionRule, + ExprTermRule, ] @staticmethod @@ -26,11 +26,11 @@ def lark_name() -> str: return "attribute" @property - def identifier(self) -> NAME: + def identifier(self) -> IdentifierRule: return self._children[0] @property - def expression(self) -> ExpressionRule: + def expression(self) -> ExprTermRule: return self._children[2] def serialize( @@ -56,40 +56,32 @@ def lark_name() -> str: def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: - blocks: List[BlockRule] = [] - attributes: List[AttributeRule] = [] + attribute_names = set() comments = [] inline_comments = [] + result = defaultdict(list) + for child in self._children: if isinstance(child, BlockRule): - blocks.append(child) + name = child.labels[0].serialize(options) + if name in attribute_names: + raise RuntimeError(f"Attribute {name} is already defined.") + result[name].append(child.serialize(options)) if isinstance(child, AttributeRule): - attributes.append(child) - # collect in-line comments from attribute assignments, expressions etc - inline_comments.extend(child.expression.inline_comments()) + attribute_names.add(child) + result.update(child.serialize(options)) + if options.with_comments: + # collect in-line comments from attribute assignments, expressions etc + inline_comments.extend(child.expression.inline_comments()) - if isinstance(child, NewLineOrCommentRule): + if isinstance(child, NewLineOrCommentRule) and options.with_comments: child_comments = child.to_list() if child_comments: comments.extend(child_comments) - result = {} - - for attribute in attributes: - result.update(attribute.serialize(options)) - - result_blocks = defaultdict(list) - for block in blocks: - name = block.labels[0].serialize(options) - if name in result.keys(): - raise RuntimeError(f"Attribute {name} is already defined.") - result_blocks[name].append(block.serialize(options)) - - result.update(**result_blocks) - if options.with_comments: if comments: result["__comments__"] = comments @@ -122,7 +114,9 @@ class BlockRule(LarkRule): _children: Tuple[ IdentifierRule, Optional[Union[IdentifierRule, StringRule]], + LBRACE, BodyRule, + RBRACE, ] def __init__(self, children, meta: Optional[Meta] = None): diff --git a/hcl2/rule_transformer/rules/containers.py b/hcl2/rule_transformer/rules/containers.py index 11ac0f5e..b82abc58 100644 --- a/hcl2/rule_transformer/rules/containers.py +++ b/hcl2/rule_transformer/rules/containers.py @@ -14,13 +14,22 @@ EQ, LBRACE, COMMA, - RBRACE, LSQB, RSQB, LPAR, RPAR, DOT, + RBRACE, + LSQB, + RSQB, + LPAR, + RPAR, + DOT, ) from hcl2.rule_transformer.rules.whitespace import ( NewLineOrCommentRule, InlineCommentMixIn, ) -from hcl2.rule_transformer.utils import SerializationOptions, SerializationContext, to_dollar_string +from hcl2.rule_transformer.utils import ( + SerializationOptions, + SerializationContext, + to_dollar_string, +) class TupleRule(InlineCommentMixIn): @@ -33,7 +42,7 @@ class TupleRule(InlineCommentMixIn): Optional[NewLineOrCommentRule], COMMA, Optional[NewLineOrCommentRule], - ... + # ... ], ExpressionRule, Optional[NewLineOrCommentRule], @@ -52,14 +61,18 @@ def elements(self) -> List[ExpressionRule]: child for child in self.children[1:-1] if isinstance(child, ExpressionRule) ] - def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: - if not options.wrap_tuples: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + if not options.wrap_tuples and not context.inside_dollar_string: return [element.serialize(options, context) for element in self.elements] with context.modify(inside_dollar_string=True): - result = f"[{", ".join( + result = "[" + result += ", ".join( str(element.serialize(options, context)) for element in self.elements - )}]" + ) + result += "]" if not context.inside_dollar_string: result = to_dollar_string(result) @@ -81,7 +94,9 @@ def lark_name() -> str: def value(self) -> key_T: return self._children[0] - def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: return self.value.serialize(options, context) @@ -93,7 +108,6 @@ class ObjectElemKeyExpressionRule(LarkRule): RPAR, ] - @staticmethod def lark_name() -> str: return "object_elem_key_expression" @@ -102,7 +116,9 @@ def lark_name() -> str: def expression(self) -> ExpressionRule: return self._children[1] - def serialize(self, options=SerializationOptions(), context=SerializationContext()) -> Any: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: with context.modify(inside_dollar_string=True): result = f"({self.expression.serialize(options, context)})" if not context.inside_dollar_string: @@ -117,8 +133,7 @@ class ObjectElemKeyDotAccessor(LarkRule): Tuple[ IdentifierRule, DOT, - ... - ] + ], ] @staticmethod @@ -129,8 +144,12 @@ def lark_name() -> str: def identifiers(self) -> List[IdentifierRule]: return [child for child in self._children if isinstance(child, IdentifierRule)] - def serialize(self, options=SerializationOptions(), context=SerializationContext()) -> Any: - return ".".join(identifier.serialize(options, context) for identifier in self.identifiers) + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return ".".join( + identifier.serialize(options, context) for identifier in self.identifiers + ) class ObjectElemRule(LarkRule): @@ -153,9 +172,13 @@ def key(self) -> ObjectElemKeyRule: def expression(self): return self._children[2] - def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: return { - self.key.serialize(options, context): self.expression.serialize(options, context) + self.key.serialize(options, context): self.expression.serialize( + options, context + ) } @@ -169,7 +192,6 @@ class ObjectRule(InlineCommentMixIn): Optional[NewLineOrCommentRule], Optional[COMMA], Optional[NewLineOrCommentRule], - ... ], RBRACE, ] @@ -184,8 +206,10 @@ def elements(self) -> List[ObjectElemRule]: child for child in self.children[1:-1] if isinstance(child, ObjectElemRule) ] - def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: - if not options.wrap_objects: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + if not options.wrap_objects and not context.inside_dollar_string: result = {} for element in self.elements: result.update(element.serialize(options, context)) @@ -193,12 +217,13 @@ def serialize(self, options = SerializationOptions(), context = SerializationCon return result with context.modify(inside_dollar_string=True): - result = f"{{{", ".join( + result = "{" + result += ", ".join( f"{element.key.serialize(options, context)} = {element.expression.serialize(options,context)}" for element in self.elements - )}}}" + ) + result += "}" if not context.inside_dollar_string: result = to_dollar_string(result) - return result diff --git a/hcl2/rule_transformer/rules/expressions.py b/hcl2/rule_transformer/rules/expressions.py index d89f3b3c..0e0c9be8 100644 --- a/hcl2/rule_transformer/rules/expressions.py +++ b/hcl2/rule_transformer/rules/expressions.py @@ -16,7 +16,6 @@ from hcl2.rule_transformer.utils import ( wrap_into_parentheses, to_dollar_string, - unwrap_dollar_string, SerializationOptions, SerializationContext, ) @@ -58,7 +57,7 @@ def __init__(self, children, meta: Optional[Meta] = None): self._parentheses = True else: children = [None, *children, None] - self._possibly_insert_null_comments(children, [1, 3]) + self._insert_optionals(children, [1, 3]) super().__init__(children, meta) @property @@ -100,7 +99,7 @@ def lark_name() -> str: return "conditional" def __init__(self, children, meta: Optional[Meta] = None): - self._possibly_insert_null_comments(children, [2, 4, 6]) + self._insert_optionals(children, [2, 4, 6]) super().__init__(children, meta) @property @@ -118,7 +117,7 @@ def if_false(self) -> ExpressionRule: def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: - with context.modify(inside_dollar_string=False): + with context.modify(inside_dollar_string=True): result = ( f"{self.condition.serialize(options, context)} " f"? {self.if_true.serialize(options, context)} " @@ -144,7 +143,7 @@ def lark_name() -> str: return "binary_term" def __init__(self, children, meta: Optional[Meta] = None): - self._possibly_insert_null_comments(children, [1]) + self._insert_optionals(children, [1]) super().__init__(children, meta) @property diff --git a/hcl2/rule_transformer/rules/for_expressions.py b/hcl2/rule_transformer/rules/for_expressions.py index 18abe6c8..3a89aba3 100644 --- a/hcl2/rule_transformer/rules/for_expressions.py +++ b/hcl2/rule_transformer/rules/for_expressions.py @@ -52,18 +52,23 @@ def lark_name() -> str: return "for_intro" def __init__(self, children, meta: Optional[Meta] = None): - # Insert null comments at positions where they might be missing - self._possibly_insert_null_second_identifier(children) - self._possibly_insert_null_comments(children, [1, 5, 7, 9, 11]) + + self._insert_optionals(children) super().__init__(children, meta) - def _possibly_insert_null_second_identifier(self, children: List[LarkRule]): - second_identifier_present = ( - len([child for child in children if isinstance(child, IdentifierRule)]) == 2 - ) - if not second_identifier_present: - children.insert(3, None) - children.insert(4, None) + def _insert_optionals(self, children: List, indexes: List[int] = None): + identifiers = [child for child in children if isinstance(child, IdentifierRule)] + second_identifier = identifiers[1] if len(identifiers) == 2 else None + + indexes = [1, 5, 7, 9, 11] + if second_identifier is None: + indexes.extend([3, 4]) + + super()._insert_optionals(children, sorted(indexes)) + + if second_identifier is not None: + children[3] = COMMA() + children[4] = second_identifier @property def first_iterator(self) -> IdentifierRule: @@ -90,7 +95,6 @@ def serialize( result += f", {self.second_iterator.serialize(options, context)}" result += f" in {self.iterable.serialize(options, context)} : " - return result @@ -108,7 +112,7 @@ def lark_name() -> str: return "for_cond" def __init__(self, children, meta: Optional[Meta] = None): - self._possibly_insert_null_comments(children, [1]) + self._insert_optionals(children, [1]) super().__init__(children, meta) @property @@ -142,13 +146,25 @@ def lark_name() -> str: return "for_tuple_expr" def __init__(self, children, meta: Optional[Meta] = None): - self._possibly_insert_null_comments(children, [1, 3, 5, 7]) - self._possibly_insert_null_condition(children) + self._insert_optionals(children) super().__init__(children, meta) - def _possibly_insert_null_condition(self, children: List[LarkElement]): - if not len([child for child in children if isinstance(child, ForCondRule)]): - children.insert(6, None) + def _insert_optionals(self, children: List, indexes: List[int] = None): + condition = None + + for child in children: + if isinstance(child, ForCondRule): + condition = child + break + + indexes = [1, 3, 5, 7] + + if condition is None: + indexes.append(6) + + super()._insert_optionals(children, sorted(indexes)) + + children[6] = condition @property def for_intro(self) -> ForIntroRule: @@ -209,30 +225,30 @@ def lark_name() -> str: return "for_object_expr" def __init__(self, children, meta: Optional[Meta] = None): - self._possibly_insert_null_comments(children, [1, 3, 6, 8, 10, 12]) - self._possibly_insert_null_optionals(children) + self._insert_optionals(children) super().__init__(children, meta) - def _possibly_insert_null_optionals(self, children: List[LarkElement]): - has_ellipsis = False - has_condition = False + def _insert_optionals(self, children: List, indexes: List[int] = None): + ellipsis_ = None + condition = None for child in children: - # if not has_ellipsis and isinstance(child, ELLIPSIS): - if ( - has_ellipsis is False - and child is not None - and child.lark_name() == ELLIPSIS.lark_name() - ): - has_ellipsis = True - if not has_condition and isinstance(child, ForCondRule): - has_condition = True - - if not has_ellipsis: - children.insert(9, None) - - if not has_condition: - children.insert(11, None) + if ellipsis_ is None and isinstance(child, ELLIPSIS): + ellipsis_ = child + if condition is None and isinstance(child, ForCondRule): + condition = child + + indexes = [1, 3, 6, 8, 10, 12] + + if ellipsis_ is None: + indexes.append(9) + if condition is None: + indexes.append(11) + + super()._insert_optionals(children, sorted(indexes)) + + children[9] = ellipsis_ + children[11] = condition @property def for_intro(self) -> ForIntroRule: @@ -262,6 +278,7 @@ def condition(self) -> Optional[ForCondRule]: def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + result = "{" with context.modify(inside_dollar_string=True): result += self.for_intro.serialize(options, context) @@ -270,7 +287,6 @@ def serialize( result += self.value_expr.serialize( SerializationOptions(wrap_objects=True), context ) - if self.ellipsis is not None: result += self.ellipsis.serialize(options, context) diff --git a/hcl2/rule_transformer/rules/functions.py b/hcl2/rule_transformer/rules/functions.py index b25fed62..9e52a47b 100644 --- a/hcl2/rule_transformer/rules/functions.py +++ b/hcl2/rule_transformer/rules/functions.py @@ -4,8 +4,15 @@ from hcl2.rule_transformer.rules.expressions import ExpressionRule from hcl2.rule_transformer.rules.literal_rules import IdentifierRule from hcl2.rule_transformer.rules.tokens import COMMA, ELLIPSIS, StringToken, LPAR, RPAR -from hcl2.rule_transformer.rules.whitespace import InlineCommentMixIn, NewLineOrCommentRule -from hcl2.rule_transformer.utils import SerializationOptions, SerializationContext, to_dollar_string +from hcl2.rule_transformer.rules.whitespace import ( + InlineCommentMixIn, + NewLineOrCommentRule, +) +from hcl2.rule_transformer.utils import ( + SerializationOptions, + SerializationContext, + to_dollar_string, +) class ArgumentsRule(InlineCommentMixIn): @@ -17,7 +24,7 @@ class ArgumentsRule(InlineCommentMixIn): COMMA, Optional[NewLineOrCommentRule], ExpressionRule, - ... + # ... ], Optional[Union[COMMA, ELLIPSIS]], Optional[NewLineOrCommentRule], @@ -39,8 +46,12 @@ def has_ellipsis(self) -> bool: def arguments(self) -> List[ExpressionRule]: return [child for child in self._children if isinstance(child, ExpressionRule)] - def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: - result = ", ".join([str(argument.serialize(options, context)) for argument in self.arguments]) + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + result = ", ".join( + [str(argument.serialize(options, context)) for argument in self.arguments] + ) if self.has_ellipsis: result += " ..." return result @@ -75,30 +86,32 @@ def arguments(self) -> Optional[ArgumentsRule]: if isinstance(child, ArgumentsRule): return child - - def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: - result = ( - f"{"::".join(identifier.serialize(options, context) for identifier in self.identifiers)}" - f"({self.arguments.serialize(options, context) if self.arguments else ""})" + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + result = f"{'::'.join(identifier.serialize(options, context) for identifier in self.identifiers)}" + result += ( + f"({self.arguments.serialize(options, context) if self.arguments else ''})" ) + if not context.inside_dollar_string: result = to_dollar_string(result) return result -# class ProviderFunctionCallRule(FunctionCallRule): -# _children: Tuple[ -# IdentifierRule, -# IdentifierRule, -# IdentifierRule, -# LPAR, -# Optional[NewLineOrCommentRule], -# Optional[ArgumentsRule], -# Optional[NewLineOrCommentRule], -# RPAR, -# ] -# -# @staticmethod -# def lark_name() -> str: -# return "provider_function_call" +class ProviderFunctionCallRule(FunctionCallRule): + _children: Tuple[ + IdentifierRule, + IdentifierRule, + IdentifierRule, + LPAR, + Optional[NewLineOrCommentRule], + Optional[ArgumentsRule], + Optional[NewLineOrCommentRule], + RPAR, + ] + + @staticmethod + def lark_name() -> str: + return "provider_function_call" diff --git a/hcl2/rule_transformer/rules/indexing.py b/hcl2/rule_transformer/rules/indexing.py index 7a9b53a5..20decf00 100644 --- a/hcl2/rule_transformer/rules/indexing.py +++ b/hcl2/rule_transformer/rules/indexing.py @@ -67,7 +67,7 @@ def serialize( return f"[{self.index_expression.serialize(options)}]" def __init__(self, children, meta: Optional[Meta] = None): - self._possibly_insert_null_comments(children, [1, 3]) + self._insert_optionals(children, [1, 3]) super().__init__(children, meta) diff --git a/hcl2/rule_transformer/rules/tokens.py b/hcl2/rule_transformer/rules/tokens.py index 67d53fcf..ba948d3e 100644 --- a/hcl2/rule_transformer/rules/tokens.py +++ b/hcl2/rule_transformer/rules/tokens.py @@ -37,6 +37,9 @@ def serialize_conversion(self) -> Callable[[Any], str]: class StaticStringToken(LarkToken): + + classes_by_value = {} + @classmethod @lru_cache(maxsize=None) def __build_subclass( @@ -44,7 +47,7 @@ def __build_subclass( ) -> Type["StringToken"]: """Create a subclass with a constant `lark_name`.""" - return type( # type: ignore + result = type( # type: ignore f"{name}_TOKEN", (cls,), { @@ -53,6 +56,8 @@ def __build_subclass( "_default_value": default_value, }, ) + cls.classes_by_value[default_value] = result + return result def __class_getitem__(cls, value: Tuple[str, str]) -> Type["StringToken"]: name, default_value = value @@ -72,8 +77,9 @@ def serialize_conversion(self) -> Callable[[Any], str]: STRING_CHARS = StringToken["STRING_CHARS"] ESCAPED_INTERPOLATION = StringToken["ESCAPED_INTERPOLATION"] BINARY_OP = StringToken["BINARY_OP"] -HEREDOC_TEMPLATE = STRING_CHARS["HEREDOC_TEMPLATE"] -HEREDOC_TRIM_TEMPLATE = STRING_CHARS["HEREDOC_TRIM_TEMPLATE"] +HEREDOC_TEMPLATE = StringToken["HEREDOC_TEMPLATE"] +HEREDOC_TRIM_TEMPLATE = StringToken["HEREDOC_TRIM_TEMPLATE"] +NL_OR_COMMENT = StringToken["NL_OR_COMMENT"] # static values EQ = StaticStringToken[("EQ", "=")] COLON = StaticStringToken[("COLON", ":")] diff --git a/hcl2/rule_transformer/rules/whitespace.py b/hcl2/rule_transformer/rules/whitespace.py index fa24355c..62069b78 100644 --- a/hcl2/rule_transformer/rules/whitespace.py +++ b/hcl2/rule_transformer/rules/whitespace.py @@ -3,7 +3,7 @@ from hcl2.rule_transformer.rules.abstract import LarkToken, LarkRule from hcl2.rule_transformer.rules.literal_rules import TokenRule -from hcl2.rule_transformer.utils import SerializationOptions +from hcl2.rule_transformer.utils import SerializationOptions, SerializationContext class NewLineOrCommentRule(TokenRule): @@ -15,6 +15,11 @@ def lark_name() -> str: def from_string(cls, string: str) -> "NewLineOrCommentRule": return cls([LarkToken("NL_OR_COMMENT", string)]) + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return self.token.serialize() + def to_list( self, options: SerializationOptions = SerializationOptions() ) -> Optional[List[str]]: @@ -43,7 +48,7 @@ def to_list( class InlineCommentMixIn(LarkRule, ABC): - def _possibly_insert_null_comments(self, children: List, indexes: List[int] = None): + def _insert_optionals(self, children: List, indexes: List[int] = None): for index in indexes: try: child = children[index] diff --git a/hcl2/rule_transformer/transformer.py b/hcl2/rule_transformer/transformer.py index 1ab1dfda..931eab8e 100644 --- a/hcl2/rule_transformer/transformer.py +++ b/hcl2/rule_transformer/transformer.py @@ -60,6 +60,7 @@ IntLiteral, FloatLiteral, StringToken, + StaticStringToken, ) from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule @@ -80,6 +81,8 @@ def __init__(self, discard_new_line_or_comments: bool = False): def __default_token__(self, token: Token) -> StringToken: # TODO make this return StaticStringToken where applicable + if token.value in StaticStringToken.classes_by_value.keys(): + return StaticStringToken.classes_by_value[token.value]() return StringToken[token.type](token.value) def FLOAT_LITERAL(self, token: Token) -> FloatLiteral: diff --git a/hcl2/rule_transformer/utils.py b/hcl2/rule_transformer/utils.py index 98370ca3..8f1d7352 100644 --- a/hcl2/rule_transformer/utils.py +++ b/hcl2/rule_transformer/utils.py @@ -7,7 +7,6 @@ HEREDOC_TRIM_PATTERN = re.compile(r"<<-([a-zA-Z][a-zA-Z0-9._-]+)\n([\s\S]*)\1", re.S) - @dataclass class SerializationOptions: with_comments: bool = True @@ -18,11 +17,6 @@ class SerializationOptions: preserve_heredocs: bool = True -@dataclass -class DeserializationOptions: - heredocs_to_strings: bool = False - - @dataclass class SerializationContext: inside_dollar_string: bool = False From 5ccfa657f28f152ea338c03d36508e365046c6f7 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Fri, 12 Dec 2025 14:09:37 +0100 Subject: [PATCH 11/24] * HCLReconstructor._reconstruct_token - handle 0 length tokens * FunctionCallRule.serialize - properly serialize into dollar string * remove unused import --- hcl2/rule_transformer/reconstructor.py | 3 ++- hcl2/rule_transformer/rules/containers.py | 1 - hcl2/rule_transformer/rules/functions.py | 7 +++---- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/hcl2/rule_transformer/reconstructor.py b/hcl2/rule_transformer/reconstructor.py index 7d316b2c..6aa8a4a4 100644 --- a/hcl2/rule_transformer/reconstructor.py +++ b/hcl2/rule_transformer/reconstructor.py @@ -167,7 +167,8 @@ def _reconstruct_token(self, token: Token, parent_rule_name: str = None) -> str: result = " " + result self._last_token_name = token.type - self._last_was_space = result[-1].endswith(" ") or result[-1].endswith("\n") + if len(token) != 0: + self._last_was_space = result[-1].endswith(" ") or result[-1].endswith("\n") return result diff --git a/hcl2/rule_transformer/rules/containers.py b/hcl2/rule_transformer/rules/containers.py index b82abc58..a2f53436 100644 --- a/hcl2/rule_transformer/rules/containers.py +++ b/hcl2/rule_transformer/rules/containers.py @@ -1,4 +1,3 @@ -import json from typing import Tuple, List, Optional, Union, Any from hcl2.rule_transformer.rules.abstract import LarkRule diff --git a/hcl2/rule_transformer/rules/functions.py b/hcl2/rule_transformer/rules/functions.py index 9e52a47b..92cc8b11 100644 --- a/hcl2/rule_transformer/rules/functions.py +++ b/hcl2/rule_transformer/rules/functions.py @@ -89,10 +89,9 @@ def arguments(self) -> Optional[ArgumentsRule]: def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: - result = f"{'::'.join(identifier.serialize(options, context) for identifier in self.identifiers)}" - result += ( - f"({self.arguments.serialize(options, context) if self.arguments else ''})" - ) + with context.modify(inside_dollar_string=True): + result = f"{'::'.join(identifier.serialize(options, context) for identifier in self.identifiers)}" + result += f"({self.arguments.serialize(options, context) if self.arguments else ''})" if not context.inside_dollar_string: result = to_dollar_string(result) From ca192325cc03a72618773cf31199b53c27e24774 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sat, 21 Feb 2026 14:33:09 +0100 Subject: [PATCH 12/24] fix operator precedence --- hcl2/rule_transformer/hcl2.lark | 60 +++++++++++++++++++--- hcl2/rule_transformer/reconstructor.py | 41 +++++++++++++-- hcl2/rule_transformer/rules/expressions.py | 55 +++++++++++++++++--- hcl2/rule_transformer/utils.py | 2 + 4 files changed, 138 insertions(+), 20 deletions(-) diff --git a/hcl2/rule_transformer/hcl2.lark b/hcl2/rule_transformer/hcl2.lark index 24140ada..63154efb 100644 --- a/hcl2/rule_transformer/hcl2.lark +++ b/hcl2/rule_transformer/hcl2.lark @@ -24,7 +24,6 @@ FLOAT_LITERAL: (NEGATIVE_DECIMAL? DECIMAL+ | NEGATIVE_DECIMAL+) "." DECIMAL+ (EX | (NEGATIVE_DECIMAL? DECIMAL+ | NEGATIVE_DECIMAL+) (EXP_MARK) // Operators -BINARY_OP : DOUBLE_EQ | NEQ | LT | GT | LEQ | GEQ | MINUS | ASTERISK | SLASH | PERCENT | DOUBLE_AMP | DOUBLE_PIPE | PLUS DOUBLE_EQ : "==" NEQ : "!=" LT : "<" @@ -99,16 +98,61 @@ string_part: STRING_CHARS | interpolation // Expressions -?expression : expr_term | operation | conditional +?expression : or_expr QMARK new_line_or_comment? expression new_line_or_comment? COLON new_line_or_comment? expression -> conditional + | or_expr interpolation: INTERP_START expression RBRACE -conditional : expression QMARK new_line_or_comment? expression new_line_or_comment? COLON new_line_or_comment? expression -// Operations -?operation : unary_op | binary_op +// Operator precedence ladder (lowest to highest) +// Each level uses left recursion for left-associativity. +// Rule aliases (-> binary_op, -> binary_term, -> binary_operator) maintain +// transformer compatibility with BinaryOpRule / BinaryTermRule / BinaryOperatorRule. + +// Logical OR +?or_expr : or_expr or_binary_term new_line_or_comment? -> binary_op + | and_expr +or_binary_term : or_binary_operator new_line_or_comment? and_expr -> binary_term +!or_binary_operator : DOUBLE_PIPE -> binary_operator + +// Logical AND +?and_expr : and_expr and_binary_term new_line_or_comment? -> binary_op + | eq_expr +and_binary_term : and_binary_operator new_line_or_comment? eq_expr -> binary_term +!and_binary_operator : DOUBLE_AMP -> binary_operator + +// Equality +?eq_expr : eq_expr eq_binary_term new_line_or_comment? -> binary_op + | rel_expr +eq_binary_term : eq_binary_operator new_line_or_comment? rel_expr -> binary_term +!eq_binary_operator : DOUBLE_EQ -> binary_operator + | NEQ -> binary_operator + +// Relational +?rel_expr : rel_expr rel_binary_term new_line_or_comment? -> binary_op + | add_expr +rel_binary_term : rel_binary_operator new_line_or_comment? add_expr -> binary_term +!rel_binary_operator : LT -> binary_operator + | GT -> binary_operator + | LEQ -> binary_operator + | GEQ -> binary_operator + +// Additive +?add_expr : add_expr add_binary_term new_line_or_comment? -> binary_op + | mul_expr +add_binary_term : add_binary_operator new_line_or_comment? mul_expr -> binary_term +!add_binary_operator : PLUS -> binary_operator + | MINUS -> binary_operator + +// Multiplicative +?mul_expr : mul_expr mul_binary_term new_line_or_comment? -> binary_op + | unary_expr +mul_binary_term : mul_binary_operator new_line_or_comment? unary_expr -> binary_term +!mul_binary_operator : ASTERISK -> binary_operator + | SLASH -> binary_operator + | PERCENT -> binary_operator + +// Unary (highest precedence for operations) +?unary_expr : unary_op | expr_term !unary_op : (MINUS | NOT) expr_term -binary_op : expression binary_term new_line_or_comment? -binary_term : binary_operator new_line_or_comment? expression -!binary_operator : BINARY_OP // Expression terms expr_term : LPAR new_line_or_comment? expression new_line_or_comment? RPAR diff --git a/hcl2/rule_transformer/reconstructor.py b/hcl2/rule_transformer/reconstructor.py index 6aa8a4a4..099beead 100644 --- a/hcl2/rule_transformer/reconstructor.py +++ b/hcl2/rule_transformer/reconstructor.py @@ -6,12 +6,32 @@ from hcl2.rule_transformer.rules.for_expressions import ForIntroRule from hcl2.rule_transformer.rules.literal_rules import IdentifierRule from hcl2.rule_transformer.rules.strings import StringRule -from hcl2.rule_transformer.rules.expressions import ExprTermRule, ConditionalRule +from hcl2.rule_transformer.rules.expressions import ( + ExprTermRule, + ConditionalRule, + UnaryOpRule, +) class HCLReconstructor: """This class converts a Lark.Tree AST back into a string representing the underlying HCL code.""" + _binary_op_types = { + "DOUBLE_EQ", + "NEQ", + "LT", + "GT", + "LEQ", + "GEQ", + "MINUS", + "ASTERISK", + "SLASH", + "PERCENT", + "DOUBLE_AMP", + "DOUBLE_PIPE", + "PLUS", + } + def __init__(self): self._reset_state() @@ -105,8 +125,14 @@ def _should_add_space_before( if tokens.EQ.lark_name() in [token_type, self._last_token_name]: return True - # space around binary operators - if tokens.BINARY_OP.lark_name() in [token_type, self._last_token_name]: + # Don't add space around operator tokens inside unary_op + if parent_rule_name == UnaryOpRule.lark_name(): + return False + + if ( + token_type in self._binary_op_types + or self._last_token_name in self._binary_op_types + ): return True elif isinstance(current_node, Tree): @@ -130,7 +156,14 @@ def _reconstruct_tree(self, tree: Tree, parent_rule_name: str = None) -> List[st result = [] rule_name = tree.data - if rule_name == ExprTermRule.lark_name(): + if rule_name == UnaryOpRule.lark_name(): + for i, child in enumerate(tree.children): + result.extend(self._reconstruct_node(child, rule_name)) + if i == 0: + # Suppress space between unary operator and its operand + self._last_was_space = True + + elif rule_name == ExprTermRule.lark_name(): # Check if parenthesized if ( len(tree.children) >= 3 diff --git a/hcl2/rule_transformer/rules/expressions.py b/hcl2/rule_transformer/rules/expressions.py index 0e0c9be8..db256e82 100644 --- a/hcl2/rule_transformer/rules/expressions.py +++ b/hcl2/rule_transformer/rules/expressions.py @@ -26,8 +26,30 @@ class ExpressionRule(InlineCommentMixIn, ABC): def lark_name() -> str: return "expression" - def __init__(self, children, meta: Optional[Meta] = None): + def __init__( + self, children, meta: Optional[Meta] = None, parentheses: bool = False + ): super().__init__(children, meta) + self._parentheses = parentheses + + def _wrap_into_parentheses( + self, value: str, options=SerializationOptions(), context=SerializationContext() + ) -> str: + # do not wrap into parentheses if + # 1. already wrapped or + # 2. is top-level expression (unless explicitly wrapped) + if context.inside_parentheses: + return value + # Look through ExprTermRule wrapper to determine if truly nested + parent = getattr(self, "parent", None) + if parent is None: + return value + if isinstance(parent, ExprTermRule): + if not isinstance(parent.parent, ExpressionRule): + return value + elif not isinstance(parent, ExpressionRule): + return value + return wrap_into_parentheses(value) class ExprTermRule(ExpressionRule): @@ -47,18 +69,18 @@ def lark_name() -> str: return "expr_term" def __init__(self, children, meta: Optional[Meta] = None): - self._parentheses = False + parentheses = False if ( isinstance(children[0], LarkToken) and children[0].lark_name() == "LPAR" and isinstance(children[-1], LarkToken) and children[-1].lark_name() == "RPAR" ): - self._parentheses = True + parentheses = True else: children = [None, *children, None] self._insert_optionals(children, [1, 3]) - super().__init__(children, meta) + super().__init__(children, meta, parentheses) @property def parentheses(self) -> bool: @@ -71,7 +93,10 @@ def expression(self) -> ExpressionRule: def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: - result = self.expression.serialize(options, context) + with context.modify( + inside_parentheses=self.parentheses or context.inside_parentheses + ): + result = self.expression.serialize(options, context) if self.parentheses: result = wrap_into_parentheses(result) @@ -127,6 +152,9 @@ def serialize( if not context.inside_dollar_string: result = to_dollar_string(result) + if options.force_operation_parentheses: + result = self._wrap_into_parentheses(result, options, context) + return result @@ -192,6 +220,9 @@ def serialize( if not context.inside_dollar_string: result = to_dollar_string(result) + + if options.force_operation_parentheses: + result = self._wrap_into_parentheses(result, options, context) return result @@ -214,6 +245,14 @@ def expr_term(self): def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: - return to_dollar_string( - f"{self.operator}{self.expr_term.serialize(options, context)}" - ) + + with context.modify(inside_dollar_string=True): + result = f"{self.operator}{self.expr_term.serialize(options, context)}" + + if not context.inside_dollar_string: + result = to_dollar_string(result) + + if options.force_operation_parentheses: + result = self._wrap_into_parentheses(result, options, context) + + return result diff --git a/hcl2/rule_transformer/utils.py b/hcl2/rule_transformer/utils.py index 8f1d7352..68c32ebc 100644 --- a/hcl2/rule_transformer/utils.py +++ b/hcl2/rule_transformer/utils.py @@ -15,11 +15,13 @@ class SerializationOptions: wrap_tuples: bool = False explicit_blocks: bool = True preserve_heredocs: bool = True + force_operation_parentheses: bool = False @dataclass class SerializationContext: inside_dollar_string: bool = False + inside_parentheses: bool = False def replace(self, **kwargs) -> "SerializationContext": return replace(self, **kwargs) From fc49bad9b819f5ce89ea5ed876880248c4f621b9 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sun, 22 Feb 2026 16:20:06 +0100 Subject: [PATCH 13/24] reorganize new and old code --- hcl2/__init__.py | 1 - hcl2/api.py | 21 +- hcl2/{rule_transformer => }/deserializer.py | 20 +- hcl2/dict_transformer.py | 403 -------- hcl2/{rule_transformer => }/editor.py | 0 hcl2/{rule_transformer => }/formatter.py | 14 +- hcl2/hcl2.lark | 207 ++-- hcl2/parser.py | 24 +- hcl2/{rule_transformer => }/processor.py | 0 hcl2/py.typed | 0 hcl2/reconstructor.py | 913 ++++-------------- hcl2/rule_transformer/hcl2.lark | 207 ---- hcl2/rule_transformer/json.py | 12 - hcl2/rule_transformer/reconstructor.py | 238 ----- hcl2/rule_transformer/rules/__init__.py | 0 hcl2/{rule_transformer => rules}/__init__.py | 0 hcl2/{rule_transformer => }/rules/abstract.py | 2 +- hcl2/{rule_transformer => }/rules/base.py | 16 +- .../rules/containers.py | 14 +- .../rules/expressions.py | 10 +- .../rules/for_expressions.py | 12 +- .../{rule_transformer => }/rules/functions.py | 10 +- hcl2/{rule_transformer => }/rules/indexing.py | 12 +- .../rules/literal_rules.py | 4 +- hcl2/{rule_transformer => }/rules/strings.py | 8 +- hcl2/{rule_transformer => }/rules/tokens.py | 2 +- hcl2/{rule_transformer => }/rules/tree.py | 0 .../rules/whitespace.py | 6 +- hcl2/{rule_transformer => }/transformer.py | 20 +- hcl2/{rule_transformer => }/utils.py | 0 30 files changed, 442 insertions(+), 1734 deletions(-) rename hcl2/{rule_transformer => }/deserializer.py (94%) delete mode 100644 hcl2/dict_transformer.py rename hcl2/{rule_transformer => }/editor.py (100%) rename hcl2/{rule_transformer => }/formatter.py (94%) rename hcl2/{rule_transformer => }/processor.py (100%) delete mode 100644 hcl2/py.typed delete mode 100644 hcl2/rule_transformer/hcl2.lark delete mode 100644 hcl2/rule_transformer/json.py delete mode 100644 hcl2/rule_transformer/reconstructor.py delete mode 100644 hcl2/rule_transformer/rules/__init__.py rename hcl2/{rule_transformer => rules}/__init__.py (100%) rename hcl2/{rule_transformer => }/rules/abstract.py (97%) rename hcl2/{rule_transformer => }/rules/base.py (88%) rename hcl2/{rule_transformer => }/rules/containers.py (93%) rename hcl2/{rule_transformer => }/rules/expressions.py (95%) rename hcl2/{rule_transformer => }/rules/for_expressions.py (95%) rename hcl2/{rule_transformer => }/rules/functions.py (90%) rename hcl2/{rule_transformer => }/rules/indexing.py (94%) rename hcl2/{rule_transformer => }/rules/literal_rules.py (85%) rename hcl2/{rule_transformer => }/rules/strings.py (94%) rename hcl2/{rule_transformer => }/rules/tokens.py (98%) rename hcl2/{rule_transformer => }/rules/tree.py (100%) rename hcl2/{rule_transformer => }/rules/whitespace.py (90%) rename hcl2/{rule_transformer => }/transformer.py (93%) rename hcl2/{rule_transformer => }/utils.py (100%) diff --git a/hcl2/__init__.py b/hcl2/__init__.py index 62f5a198..2d5dad09 100644 --- a/hcl2/__init__.py +++ b/hcl2/__init__.py @@ -11,7 +11,6 @@ parse, parses, transform, - reverse_transform, writes, ) diff --git a/hcl2/api.py b/hcl2/api.py index 1cec02a2..7c384c53 100644 --- a/hcl2/api.py +++ b/hcl2/api.py @@ -2,9 +2,9 @@ from typing import TextIO from lark.tree import Tree -from hcl2.parser import parser, reconstruction_parser -from hcl2.dict_transformer import DictTransformer -from hcl2.reconstructor import HCLReconstructor, HCLReverseTransformer +from hcl2.parser import parser +from hcl2.reconstructor import HCLReconstructor +from hcl2.transformer import RuleTransformer def load(file: TextIO, with_meta=False) -> dict: @@ -27,7 +27,7 @@ def loads(text: str, with_meta=False) -> dict: # This means that all blocks must end in a new line even if the file ends # Append a new line as a temporary fix tree = parser().parse(text + "\n") - return DictTransformer(with_meta=with_meta).transform(tree) + return RuleTransformer().transform(tree) def parse(file: TextIO) -> Tree: @@ -41,7 +41,7 @@ def parses(text: str) -> Tree: """Load HCL2 syntax tree from a string. :param text: Text with hcl2 to be loaded as a dict. """ - return reconstruction_parser().parse(text) + return parser().parse(text) def transform(ast: Tree, with_meta=False) -> dict: @@ -50,18 +50,11 @@ def transform(ast: Tree, with_meta=False) -> dict: :param with_meta: If set to true then adds `__start_line__` and `__end_line__` parameters to the output dict. Default to false. """ - return DictTransformer(with_meta=with_meta).transform(ast) - - -def reverse_transform(hcl2_dict: dict) -> Tree: - """Convert a dictionary to an HCL2 AST. - :param hcl2_dict: a dictionary produced by `load` or `transform` - """ - return HCLReverseTransformer().transform(hcl2_dict) + return RuleTransformer().transform(ast) def writes(ast: Tree) -> str: """Convert an HCL2 syntax tree to a string. :param ast: HCL2 syntax tree, output from `parse` or `parses` """ - return HCLReconstructor(reconstruction_parser()).reconstruct(ast) + return HCLReconstructor().reconstruct(ast) diff --git a/hcl2/rule_transformer/deserializer.py b/hcl2/deserializer.py similarity index 94% rename from hcl2/rule_transformer/deserializer.py rename to hcl2/deserializer.py index 56e1ad44..2290809c 100644 --- a/hcl2/rule_transformer/deserializer.py +++ b/hcl2/deserializer.py @@ -8,14 +8,14 @@ from hcl2 import parses from hcl2.const import IS_BLOCK -from hcl2.rule_transformer.rules.abstract import LarkElement, LarkRule -from hcl2.rule_transformer.rules.base import ( +from hcl2.rules.abstract import LarkElement, LarkRule +from hcl2.rules.base import ( BlockRule, AttributeRule, BodyRule, StartRule, ) -from hcl2.rule_transformer.rules.containers import ( +from hcl2.rules.containers import ( TupleRule, ObjectRule, ObjectElemRule, @@ -23,20 +23,20 @@ ObjectElemKeyDotAccessor, ObjectElemKeyRule, ) -from hcl2.rule_transformer.rules.expressions import ExprTermRule -from hcl2.rule_transformer.rules.literal_rules import ( +from hcl2.rules.expressions import ExprTermRule +from hcl2.rules.literal_rules import ( IdentifierRule, IntLitRule, FloatLitRule, ) -from hcl2.rule_transformer.rules.strings import ( +from hcl2.rules.strings import ( StringRule, InterpolationRule, StringPartRule, HeredocTemplateRule, HeredocTrimTemplateRule, ) -from hcl2.rule_transformer.rules.tokens import ( +from hcl2.rules.tokens import ( NAME, EQ, DBLQUOTE, @@ -55,9 +55,9 @@ HEREDOC_TEMPLATE, COLON, ) -from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule -from hcl2.rule_transformer.transformer import RuleTransformer -from hcl2.rule_transformer.utils import HEREDOC_TRIM_PATTERN, HEREDOC_PATTERN +from hcl2.rules.whitespace import NewLineOrCommentRule +from hcl2.transformer import RuleTransformer +from hcl2.utils import HEREDOC_TRIM_PATTERN, HEREDOC_PATTERN @dataclass diff --git a/hcl2/dict_transformer.py b/hcl2/dict_transformer.py deleted file mode 100644 index 64c58bcb..00000000 --- a/hcl2/dict_transformer.py +++ /dev/null @@ -1,403 +0,0 @@ -"""A Lark Transformer for transforming a Lark parse tree into a Python dict""" -import json -import re -import sys -from collections import namedtuple -from typing import List, Dict, Any - -from lark import Token -from lark.tree import Meta -from lark.visitors import Transformer, Discard, _DiscardType, v_args - -from .reconstructor import reverse_quotes_within_interpolation - - -HEREDOC_PATTERN = re.compile(r"<<([a-zA-Z][a-zA-Z0-9._-]+)\n([\s\S]*)\1", re.S) -HEREDOC_TRIM_PATTERN = re.compile(r"<<-([a-zA-Z][a-zA-Z0-9._-]+)\n([\s\S]*)\1", re.S) - - -START_LINE = "__start_line__" -END_LINE = "__end_line__" - - -Attribute = namedtuple("Attribute", ("key", "value")) - - -# pylint: disable=missing-function-docstring,unused-argument -class DictTransformer(Transformer): - """Takes a syntax tree generated by the parser and - transforms it to a dict. - """ - - with_meta: bool - - @staticmethod - def is_type_keyword(value: str) -> bool: - return value in {"bool", "number", "string"} - - def __init__(self, with_meta: bool = False): - """ - :param with_meta: If set to true then adds `__start_line__` and `__end_line__` - parameters to the output dict. Default to false. - """ - self.with_meta = with_meta - super().__init__() - - def float_lit(self, args: List) -> float: - value = "".join([self.to_tf_inline(arg) for arg in args]) - if "e" in value: - return self.to_string_dollar(value) - return float(value) - - def int_lit(self, args: List) -> int: - return int("".join([self.to_tf_inline(arg) for arg in args])) - - def expr_term(self, args: List) -> Any: - args = self.strip_new_line_tokens(args) - - if args[0] == "true": - return True - if args[0] == "false": - return False - if args[0] == "null": - return None - - if args[0] == "(" and args[-1] == ")": - return "".join(str(arg) for arg in args) - - return args[0] - - def index_expr_term(self, args: List) -> str: - args = self.strip_new_line_tokens(args) - return f"{args[0]}{args[1]}" - - def index(self, args: List) -> str: - args = self.strip_new_line_tokens(args) - return f"[{args[0]}]" - - def get_attr_expr_term(self, args: List) -> str: - return f"{args[0]}{args[1]}" - - def get_attr(self, args: List) -> str: - return f".{args[0]}" - - def attr_splat_expr_term(self, args: List) -> str: - return f"{args[0]}{args[1]}" - - def attr_splat(self, args: List) -> str: - args_str = "".join(self.to_tf_inline(arg) for arg in args) - return f".*{args_str}" - - def full_splat_expr_term(self, args: List) -> str: - return f"{args[0]}{args[1]}" - - def full_splat(self, args: List) -> str: - args_str = "".join(self.to_tf_inline(arg) for arg in args) - return f"[*]{args_str}" - - def tuple(self, args: List) -> List: - return [self.to_string_dollar(arg) for arg in self.strip_new_line_tokens(args)] - - def object_elem(self, args: List) -> Dict: - # This returns a dict with a single key/value pair to make it easier to merge these - # into a bigger dict that is returned by the "object" function - - key = str(args[0].children[0]) - if not re.match(r".*?(\${).*}.*", key): - # do not strip quotes of a interpolation string - key = self.strip_quotes(key) - - value = self.to_string_dollar(args[2]) - return {key: value} - - def object_elem_key_dot_accessor(self, args: List) -> str: - return "".join(args) - - def object_elem_key_expression(self, args: List) -> str: - return self.to_string_dollar("".join(args)) - - def object(self, args: List) -> Dict: - args = self.strip_new_line_tokens(args) - result: Dict[str, Any] = {} - for arg in args: - if ( - isinstance(arg, Token) and arg.type == "COMMA" - ): # skip optional comma at the end of object element - continue - - result.update(arg) - return result - - def function_call(self, args: List) -> str: - args = self.strip_new_line_tokens(args) - args_str = "" - if len(args) > 1: - args_str = ", ".join( - [self.to_tf_inline(arg) for arg in args[1] if arg is not Discard] - ) - return f"{args[0]}({args_str})" - - def provider_function_call(self, args: List) -> str: - args = self.strip_new_line_tokens(args) - args_str = "" - if len(args) > 5: - args_str = ", ".join( - [self.to_tf_inline(arg) for arg in args[5] if arg is not Discard] - ) - provider_func = "::".join([args[0], args[2], args[4]]) - return f"{provider_func}({args_str})" - - def arguments(self, args: List) -> List: - return self.process_nulls(args) - - @v_args(meta=True) - def block(self, meta: Meta, args: List) -> Dict: - *block_labels, block_body = args - result: Dict[str, Any] = block_body - if self.with_meta: - result.update( - { - START_LINE: meta.line, - END_LINE: meta.end_line, - } - ) - - # create nested dict. i.e. {label1: {label2: {labelN: result}}} - for label in reversed(block_labels): - label_str = self.strip_quotes(label) - result = {label_str: result} - - return result - - def attribute(self, args: List) -> Attribute: - key = str(args[0]) - if key.startswith('"') and key.endswith('"'): - key = key[1:-1] - value = self.to_string_dollar(args[2]) - return Attribute(key, value) - - def conditional(self, args: List) -> str: - args = self.strip_new_line_tokens(args) - args = self.process_nulls(args) - return f"{args[0]} ? {args[1]} : {args[2]}" - - def binary_op(self, args: List) -> str: - return " ".join( - [self.unwrap_string_dollar(self.to_tf_inline(arg)) for arg in args] - ) - - def unary_op(self, args: List) -> str: - args = self.process_nulls(args) - return "".join([self.to_tf_inline(arg) for arg in args]) - - def binary_term(self, args: List) -> str: - args = self.strip_new_line_tokens(args) - args = self.process_nulls(args) - return " ".join([self.to_tf_inline(arg) for arg in args]) - - def body(self, args: List) -> Dict[str, List]: - # See https://github.com/hashicorp/hcl/blob/main/hclsyntax/spec.md#bodies - # --- - # A body is a collection of associated attributes and blocks. - # - # An attribute definition assigns a value to a particular attribute - # name within a body. Each distinct attribute name may be defined no - # more than once within a single body. - # - # A block creates a child body that is annotated with a block type and - # zero or more block labels. Blocks create a structural hierarchy which - # can be interpreted by the calling application. - # --- - # - # There can be more than one child body with the same block type and - # labels. This means that all blocks (even when there is only one) - # should be transformed into lists of blocks. - args = self.strip_new_line_tokens(args) - attributes = set() - result: Dict[str, Any] = {} - for arg in args: - if isinstance(arg, Attribute): - if arg.key in result: - raise RuntimeError(f"{arg.key} already defined") - result[arg.key] = arg.value - attributes.add(arg.key) - else: - # This is a block. - for key, value in arg.items(): - key = str(key) - if key in result: - if key in attributes: - raise RuntimeError(f"{key} already defined") - result[key].append(value) - else: - result[key] = [value] - - return result - - def start(self, args: List) -> Dict: - args = self.strip_new_line_tokens(args) - return args[0] - - def binary_operator(self, args: List) -> str: - return str(args[0]) - - def heredoc_template(self, args: List) -> str: - match = HEREDOC_PATTERN.match(str(args[0])) - if not match: - raise RuntimeError(f"Invalid Heredoc token: {args[0]}") - - trim_chars = "\n\t " - result = match.group(2).rstrip(trim_chars) - return f'"{result}"' - - def heredoc_template_trim(self, args: List) -> str: - # See https://github.com/hashicorp/hcl2/blob/master/hcl/hclsyntax/spec.md#template-expressions - # This is a special version of heredocs that are declared with "<<-" - # This will calculate the minimum number of leading spaces in each line of a heredoc - # and then remove that number of spaces from each line - match = HEREDOC_TRIM_PATTERN.match(str(args[0])) - if not match: - raise RuntimeError(f"Invalid Heredoc token: {args[0]}") - - trim_chars = "\n\t " - text = match.group(2).rstrip(trim_chars) - lines = text.split("\n") - - # calculate the min number of leading spaces in each line - min_spaces = sys.maxsize - for line in lines: - leading_spaces = len(line) - len(line.lstrip(" ")) - min_spaces = min(min_spaces, leading_spaces) - - # trim off that number of leading spaces from each line - lines = [line[min_spaces:] for line in lines] - - return '"%s"' % "\n".join(lines) - - def new_line_or_comment(self, args: List) -> _DiscardType: - return Discard - - # def EQ(self, args: List): - # print("EQ", args) - # return args - - def for_tuple_expr(self, args: List) -> str: - args = self.strip_new_line_tokens(args) - for_expr = " ".join([self.to_tf_inline(arg) for arg in args[1:-1]]) - return f"[{for_expr}]" - - def for_intro(self, args: List) -> str: - args = self.strip_new_line_tokens(args) - return " ".join([self.to_tf_inline(arg) for arg in args]) - - def for_cond(self, args: List) -> str: - args = self.strip_new_line_tokens(args) - return " ".join([self.to_tf_inline(arg) for arg in args]) - - def for_object_expr(self, args: List) -> str: - args = self.strip_new_line_tokens(args) - for_expr = " ".join([self.to_tf_inline(arg) for arg in args[1:-1]]) - # doubled curly braces stands for inlining the braces - # and the third pair of braces is for the interpolation - # e.g. f"{2 + 2} {{2 + 2}}" == "4 {2 + 2}" - return f"{{{for_expr}}}" - - def string(self, args: List) -> str: - return '"' + "".join(args) + '"' - - def string_part(self, args: List) -> str: - value = self.to_tf_inline(args[0]) - if value.startswith('"') and value.endswith('"'): - value = value[1:-1] - return value - - def interpolation(self, args: List) -> str: - return '"${' + str(args[0]) + '}"' - - def strip_new_line_tokens(self, args: List) -> List: - """ - Remove new line and Discard tokens. - The parser will sometimes include these in the tree so we need to strip them out here - """ - return [arg for arg in args if arg != "\n" and arg is not Discard] - - def is_string_dollar(self, value: str) -> bool: - if not isinstance(value, str): - return False - return value.startswith("${") and value.endswith("}") - - def to_string_dollar(self, value: Any) -> Any: - """Wrap a string in ${ and }""" - if not isinstance(value, str): - return value - # if it's already wrapped, pass it unmodified - if self.is_string_dollar(value): - return value - - if value.startswith('"') and value.endswith('"'): - value = str(value)[1:-1] - return self.process_escape_sequences(value) - - if self.is_type_keyword(value): - return value - - return f"${{{value}}}" - - def unwrap_string_dollar(self, value: str): - if self.is_string_dollar(value): - return value[2:-1] - return value - - def strip_quotes(self, value: Any) -> Any: - """Remove quote characters from the start and end of a string""" - if isinstance(value, str): - if value.startswith('"') and value.endswith('"'): - value = str(value)[1:-1] - return self.process_escape_sequences(value) - return value - - def process_escape_sequences(self, value: str) -> str: - """Process HCL escape sequences within quoted template expressions.""" - if isinstance(value, str): - # normal escape sequences - value = value.replace("\\n", "\n") - value = value.replace("\\r", "\r") - value = value.replace("\\t", "\t") - value = value.replace('\\"', '"') - value = value.replace("\\\\", "\\") - - # we will leave Unicode escapes (\uNNNN and \UNNNNNNNN) untouched - # for now, but this method can be extended in the future - return value - - def process_nulls(self, args: List) -> List: - return ["null" if arg is None else arg for arg in args] - - def to_tf_inline(self, value: Any) -> str: - """ - Converts complex objects (e.g.) dicts to an "inline" HCL syntax - for use in function calls and ${interpolation} strings - """ - if isinstance(value, dict): - dict_v = json.dumps(value) - return reverse_quotes_within_interpolation(dict_v) - if isinstance(value, list): - value = [self.to_tf_inline(item) for item in value] - return f"[{', '.join(value)}]" - if isinstance(value, bool): - return "true" if value else "false" - if isinstance(value, str): - return value - if isinstance(value, (int, float)): - return str(value) - if value is None: - return "None" - - raise RuntimeError(f"Invalid type to convert to inline HCL: {type(value)}") - - def identifier(self, value: Any) -> Any: - # Making identifier a token by capitalizing it to IDENTIFIER - # seems to return a token object instead of the str - # So treat it like a regular rule - # In this case we just convert the whole thing to a string - return str(value[0]) diff --git a/hcl2/rule_transformer/editor.py b/hcl2/editor.py similarity index 100% rename from hcl2/rule_transformer/editor.py rename to hcl2/editor.py diff --git a/hcl2/rule_transformer/formatter.py b/hcl2/formatter.py similarity index 94% rename from hcl2/rule_transformer/formatter.py rename to hcl2/formatter.py index ad0247dc..205d2ddd 100644 --- a/hcl2/rule_transformer/formatter.py +++ b/hcl2/formatter.py @@ -2,21 +2,21 @@ from dataclasses import dataclass from typing import List -from hcl2.rule_transformer.rules.abstract import LarkElement -from hcl2.rule_transformer.rules.base import ( +from hcl2.rules.abstract import LarkElement +from hcl2.rules.base import ( StartRule, BlockRule, AttributeRule, BodyRule, ) -from hcl2.rule_transformer.rules.containers import ObjectRule, ObjectElemRule, TupleRule -from hcl2.rule_transformer.rules.expressions import ExprTermRule, ExpressionRule -from hcl2.rule_transformer.rules.for_expressions import ( +from hcl2.rules.containers import ObjectRule, ObjectElemRule, TupleRule +from hcl2.rules.expressions import ExprTermRule, ExpressionRule +from hcl2.rules.for_expressions import ( ForTupleExprRule, ForObjectExprRule, ) -from hcl2.rule_transformer.rules.tokens import NL_OR_COMMENT, LBRACE, COLON, LSQB, COMMA -from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule +from hcl2.rules.tokens import NL_OR_COMMENT, LBRACE, COLON, LSQB, COMMA +from hcl2.rules.whitespace import NewLineOrCommentRule @dataclass diff --git a/hcl2/hcl2.lark b/hcl2/hcl2.lark index 78ba3ca6..63154efb 100644 --- a/hcl2/hcl2.lark +++ b/hcl2/hcl2.lark @@ -1,27 +1,29 @@ -start : body -body : (new_line_or_comment? (attribute | block))* new_line_or_comment? -attribute : identifier EQ expression -block : identifier (identifier | string)* new_line_or_comment? "{" body "}" -new_line_or_comment: ( NL_OR_COMMENT )+ +// ============================================================================ +// Terminals +// ============================================================================ + +// Whitespace and Comments NL_OR_COMMENT: /\n[ \t]*/ | /#.*\n/ | /\/\/.*\n/ | /\/\*(.|\n)*?(\*\/)/ -identifier : NAME | IN | FOR | IF | FOR_EACH -NAME : /[a-zA-Z_][a-zA-Z0-9_-]*/ +// Keywords IF : "if" IN : "in" FOR : "for" FOR_EACH : "for_each" -?expression : expr_term | operation | conditional -conditional : expression "?" new_line_or_comment? expression new_line_or_comment? ":" new_line_or_comment? expression +// Literals +NAME : /[a-zA-Z_][a-zA-Z0-9_-]*/ +ESCAPED_INTERPOLATION.2: /\$\$\{[^}]*\}/ +STRING_CHARS.1: /(?:(?!\$\$\{)(?!\$\{)[^"\\]|\\.|(?:\$(?!\$?\{)))+/ +DECIMAL : "0".."9" +NEGATIVE_DECIMAL : "-" DECIMAL +EXP_MARK : ("e" | "E") ("+" | "-")? DECIMAL+ +INT_LITERAL: NEGATIVE_DECIMAL? DECIMAL+ +FLOAT_LITERAL: (NEGATIVE_DECIMAL? DECIMAL+ | NEGATIVE_DECIMAL+) "." DECIMAL+ (EXP_MARK)? + | (NEGATIVE_DECIMAL? DECIMAL+ | NEGATIVE_DECIMAL+) (EXP_MARK) -?operation : unary_op | binary_op -!unary_op : ("-" | "!") expr_term -binary_op : expression binary_term new_line_or_comment? -!binary_operator : BINARY_OP -binary_term : binary_operator new_line_or_comment? expression -BINARY_OP : DOUBLE_EQ | NEQ | LT | GT | LEQ | GEQ | MINUS | ASTERISK | SLASH | PERCENT | DOUBLE_AMP | DOUBLE_PIPE | PLUS +// Operators DOUBLE_EQ : "==" NEQ : "!=" LT : "<" @@ -35,74 +37,171 @@ PERCENT : "%" DOUBLE_AMP : "&&" DOUBLE_PIPE : "||" PLUS : "+" +NOT : "!" +QMARK : "?" + +// Punctuation LPAR : "(" RPAR : ")" +LBRACE : "{" +RBRACE : "}" +LSQB : "[" +RSQB : "]" COMMA : "," DOT : "." +EQ : /[ \t]*=(?!=|>)/ COLON : ":" +DBLQUOTE : "\"" + +// Interpolation +INTERP_START : "${" + +// Splat Operators +ATTR_SPLAT : ".*" +FULL_SPLAT_START : "[*]" + +// Special Operators +FOR_OBJECT_ARROW : "=>" +ELLIPSIS : "..." +COLONS: "::" + +// Heredocs +HEREDOC_TEMPLATE : /<<(?P[a-zA-Z][a-zA-Z0-9._-]+)\n(?:.|\n)*?\n\s*(?P=heredoc)\n/ +HEREDOC_TEMPLATE_TRIM : /<<-(?P[a-zA-Z][a-zA-Z0-9._-]+)\n(?:.|\n)*?\n\s*(?P=heredoc_trim)\n/ + +// Ignore whitespace (but not newlines, as they're significant in HCL) +%ignore /[ \t]+/ + +// ============================================================================ +// Rules +// ============================================================================ + +// Top-level structure +start : body + +// Body and basic constructs +body : (new_line_or_comment? (attribute | block))* new_line_or_comment? +attribute : identifier EQ expression +block : identifier (identifier | string)* new_line_or_comment? LBRACE body RBRACE + +// Whitespace and comments +new_line_or_comment: ( NL_OR_COMMENT )+ + +// Basic literals and identifiers +identifier : NAME +keyword: IN | FOR | IF | FOR_EACH +int_lit: INT_LITERAL +float_lit: FLOAT_LITERAL +string: DBLQUOTE string_part* DBLQUOTE +string_part: STRING_CHARS + | ESCAPED_INTERPOLATION + | interpolation + +// Expressions +?expression : or_expr QMARK new_line_or_comment? expression new_line_or_comment? COLON new_line_or_comment? expression -> conditional + | or_expr +interpolation: INTERP_START expression RBRACE + +// Operator precedence ladder (lowest to highest) +// Each level uses left recursion for left-associativity. +// Rule aliases (-> binary_op, -> binary_term, -> binary_operator) maintain +// transformer compatibility with BinaryOpRule / BinaryTermRule / BinaryOperatorRule. + +// Logical OR +?or_expr : or_expr or_binary_term new_line_or_comment? -> binary_op + | and_expr +or_binary_term : or_binary_operator new_line_or_comment? and_expr -> binary_term +!or_binary_operator : DOUBLE_PIPE -> binary_operator + +// Logical AND +?and_expr : and_expr and_binary_term new_line_or_comment? -> binary_op + | eq_expr +and_binary_term : and_binary_operator new_line_or_comment? eq_expr -> binary_term +!and_binary_operator : DOUBLE_AMP -> binary_operator + +// Equality +?eq_expr : eq_expr eq_binary_term new_line_or_comment? -> binary_op + | rel_expr +eq_binary_term : eq_binary_operator new_line_or_comment? rel_expr -> binary_term +!eq_binary_operator : DOUBLE_EQ -> binary_operator + | NEQ -> binary_operator + +// Relational +?rel_expr : rel_expr rel_binary_term new_line_or_comment? -> binary_op + | add_expr +rel_binary_term : rel_binary_operator new_line_or_comment? add_expr -> binary_term +!rel_binary_operator : LT -> binary_operator + | GT -> binary_operator + | LEQ -> binary_operator + | GEQ -> binary_operator +// Additive +?add_expr : add_expr add_binary_term new_line_or_comment? -> binary_op + | mul_expr +add_binary_term : add_binary_operator new_line_or_comment? mul_expr -> binary_term +!add_binary_operator : PLUS -> binary_operator + | MINUS -> binary_operator + +// Multiplicative +?mul_expr : mul_expr mul_binary_term new_line_or_comment? -> binary_op + | unary_expr +mul_binary_term : mul_binary_operator new_line_or_comment? unary_expr -> binary_term +!mul_binary_operator : ASTERISK -> binary_operator + | SLASH -> binary_operator + | PERCENT -> binary_operator + +// Unary (highest precedence for operations) +?unary_expr : unary_op | expr_term +!unary_op : (MINUS | NOT) expr_term + +// Expression terms expr_term : LPAR new_line_or_comment? expression new_line_or_comment? RPAR | float_lit | int_lit | string | tuple | object - | function_call - | index_expr_term - | get_attr_expr_term | identifier - | provider_function_call + | function_call | heredoc_template | heredoc_template_trim + | index_expr_term + | get_attr_expr_term | attr_splat_expr_term | full_splat_expr_term | for_tuple_expr | for_object_expr -string: "\"" string_part* "\"" -string_part: STRING_CHARS - | ESCAPED_INTERPOLATION - | interpolation -interpolation: "${" expression "}" -ESCAPED_INTERPOLATION.2: /\$\$\{[^}]*\}/ -STRING_CHARS.1: /(?:(?!\$\$\{)(?!\$\{)[^"\\]|\\.|(?:\$(?!\$?\{)))+/ - -int_lit : NEGATIVE_DECIMAL? DECIMAL+ | NEGATIVE_DECIMAL+ -!float_lit: (NEGATIVE_DECIMAL? DECIMAL+ | NEGATIVE_DECIMAL+) "." DECIMAL+ (EXP_MARK)? - | (NEGATIVE_DECIMAL? DECIMAL+ | NEGATIVE_DECIMAL+) ("." DECIMAL+)? (EXP_MARK) -NEGATIVE_DECIMAL : "-" DECIMAL -DECIMAL : "0".."9" -EXP_MARK : ("e" | "E") ("+" | "-")? DECIMAL+ -EQ : /[ \t]*=(?!=|>)/ - -tuple : "[" (new_line_or_comment* expression new_line_or_comment* ",")* (new_line_or_comment* expression)? new_line_or_comment* "]" -object : "{" new_line_or_comment? (new_line_or_comment* (object_elem | (object_elem COMMA)) new_line_or_comment*)* "}" +// Collections +tuple : LSQB new_line_or_comment? (expression new_line_or_comment? COMMA new_line_or_comment?)* (expression new_line_or_comment? COMMA? new_line_or_comment?)? RSQB +object : LBRACE new_line_or_comment? ((object_elem | (object_elem new_line_or_comment? COMMA)) new_line_or_comment?)* RBRACE object_elem : object_elem_key ( EQ | COLON ) expression object_elem_key : float_lit | int_lit | identifier | string | object_elem_key_dot_accessor | object_elem_key_expression object_elem_key_expression : LPAR expression RPAR object_elem_key_dot_accessor : identifier (DOT identifier)+ -heredoc_template : /<<(?P[a-zA-Z][a-zA-Z0-9._-]+)\n?(?:.|\n)*?\n\s*(?P=heredoc)\n/ -heredoc_template_trim : /<<-(?P[a-zA-Z][a-zA-Z0-9._-]+)\n?(?:.|\n)*?\n\s*(?P=heredoc_trim)\n/ +// Heredocs +heredoc_template : HEREDOC_TEMPLATE +heredoc_template_trim : HEREDOC_TEMPLATE_TRIM -function_call : identifier "(" new_line_or_comment? arguments? new_line_or_comment? ")" -arguments : (expression (new_line_or_comment* "," new_line_or_comment* expression)* ("," | "...")? new_line_or_comment*) -colons: "::" -provider_function_call: identifier colons identifier colons identifier "(" new_line_or_comment? arguments? new_line_or_comment? ")" +// Functions +function_call : identifier (COLONS identifier COLONS identifier)? LPAR new_line_or_comment? arguments? new_line_or_comment? RPAR +arguments : (expression (new_line_or_comment? COMMA new_line_or_comment? expression)* (COMMA | ELLIPSIS)? new_line_or_comment?) +// Indexing and attribute access index_expr_term : expr_term index get_attr_expr_term : expr_term get_attr attr_splat_expr_term : expr_term attr_splat full_splat_expr_term : expr_term full_splat -index : "[" new_line_or_comment? expression new_line_or_comment? "]" | "." DECIMAL+ -get_attr : "." identifier -attr_splat : ".*" get_attr* -full_splat : "[*]" (get_attr | index)* +?index : braces_index | short_index +braces_index : LSQB new_line_or_comment? expression new_line_or_comment? RSQB +short_index : DOT INT_LITERAL +get_attr : DOT identifier +attr_splat : ATTR_SPLAT (get_attr | index)* +full_splat : FULL_SPLAT_START (get_attr | index)* -FOR_OBJECT_ARROW : "=>" -!for_tuple_expr : "[" new_line_or_comment? for_intro new_line_or_comment? expression new_line_or_comment? for_cond? new_line_or_comment? "]" -!for_object_expr : "{" new_line_or_comment? for_intro new_line_or_comment? expression FOR_OBJECT_ARROW new_line_or_comment? expression new_line_or_comment? "..."? new_line_or_comment? for_cond? new_line_or_comment? "}" -!for_intro : "for" new_line_or_comment? identifier ("," identifier new_line_or_comment?)? new_line_or_comment? "in" new_line_or_comment? expression new_line_or_comment? ":" new_line_or_comment? -!for_cond : "if" new_line_or_comment? expression - -%ignore /[ \t]+/ +// For expressions +!for_tuple_expr : LSQB new_line_or_comment? for_intro new_line_or_comment? expression new_line_or_comment? for_cond? new_line_or_comment? RSQB +!for_object_expr : LBRACE new_line_or_comment? for_intro new_line_or_comment? expression FOR_OBJECT_ARROW new_line_or_comment? expression new_line_or_comment? ELLIPSIS? new_line_or_comment? for_cond? new_line_or_comment? RBRACE +!for_intro : FOR new_line_or_comment? identifier (COMMA identifier new_line_or_comment?)? new_line_or_comment? IN new_line_or_comment? expression new_line_or_comment? COLON new_line_or_comment? +!for_cond : IF new_line_or_comment? expression diff --git a/hcl2/parser.py b/hcl2/parser.py index 3e524736..a33fe5f8 100644 --- a/hcl2/parser.py +++ b/hcl2/parser.py @@ -12,31 +12,9 @@ def parser() -> Lark: """Build standard parser for transforming HCL2 text into python structures""" return Lark.open( - "rule_transformer/hcl2.lark", + "hcl2.lark", parser="lalr", cache=str(PARSER_FILE), # Disable/Delete file to effect changes to the grammar rel_to=__file__, propagate_positions=True, ) - - -@functools.lru_cache() -def reconstruction_parser() -> Lark: - """ - Build parser for transforming python structures into HCL2 text. - This is duplicated from `parser` because we need different options here for - the reconstructor. Please make sure changes are kept in sync between the two - if necessary. - """ - return Lark.open( - "rule_transformer/hcl2.lark", - parser="lalr", - # Caching must be disabled to allow for reconstruction until lark-parser/lark#1472 is fixed: - # - # https://github.com/lark-parser/lark/issues/1472 - # - # cache=str(PARSER_FILE), # Disable/Delete file to effect changes to the grammar - rel_to=__file__, - propagate_positions=True, - maybe_placeholders=False, # Needed for reconstruction - ) diff --git a/hcl2/rule_transformer/processor.py b/hcl2/processor.py similarity index 100% rename from hcl2/rule_transformer/processor.py rename to hcl2/processor.py diff --git a/hcl2/py.typed b/hcl2/py.typed deleted file mode 100644 index e69de29b..00000000 diff --git a/hcl2/reconstructor.py b/hcl2/reconstructor.py index 555edcf6..e92f7040 100644 --- a/hcl2/reconstructor.py +++ b/hcl2/reconstructor.py @@ -1,739 +1,238 @@ -"""A reconstructor for HCL2 implemented using Lark's experimental reconstruction functionality""" - -import re -from typing import List, Dict, Callable, Optional, Union, Any, Tuple - -from lark import Lark, Tree -from lark.grammar import Terminal, Symbol -from lark.lexer import Token, PatternStr, TerminalDef -from lark.reconstruct import Reconstructor -from lark.tree_matcher import is_discarded_terminal -from lark.visitors import Transformer_InPlace -from regex import regex - -from hcl2.const import START_LINE_KEY, END_LINE_KEY -from hcl2.parser import reconstruction_parser - - -# function to remove the backslashes within interpolated portions -def reverse_quotes_within_interpolation(interp_s: str) -> str: - """ - A common operation is to `json.dumps(s)` where s is a string to output in - HCL. This is useful for automatically escaping any quotes within the - string, but this escapes quotes within interpolation incorrectly. This - method removes any erroneous escapes within interpolated segments of a - string. - """ - return re.sub(r"\$\{(.*)}", lambda m: m.group(0).replace('\\"', '"'), interp_s) - - -class WriteTokensAndMetaTransformer(Transformer_InPlace): - """ - Inserts discarded tokens into their correct place, according to the rules - of grammar, and annotates with metadata during reassembly. The metadata - tracked here include the terminal which generated a particular string - output, and the rule that that terminal was matched on. - - This is a modification of lark.reconstruct.WriteTokensTransformer - """ - - tokens: Dict[str, TerminalDef] - term_subs: Dict[str, Callable[[Symbol], str]] - - def __init__( - self, - tokens: Dict[str, TerminalDef], - term_subs: Dict[str, Callable[[Symbol], str]], - ) -> None: - super().__init__() - self.tokens = tokens - self.term_subs = term_subs - - def __default__(self, data, children, meta): - """ - This method is called for every token the transformer visits. - """ - - if not getattr(meta, "match_tree", False): - return Tree(data, children) - iter_args = iter( - [child[2] if isinstance(child, tuple) else child for child in children] - ) - to_write = [] - for sym in meta.orig_expansion: - if is_discarded_terminal(sym): - try: - value = self.term_subs[sym.name](sym) - except KeyError as exc: - token = self.tokens[sym.name] - if not isinstance(token.pattern, PatternStr): - raise NotImplementedError( - f"Reconstructing regexps not supported yet: {token}" - ) from exc - - value = token.pattern.value - - # annotate the leaf with the specific rule (data) and terminal - # (sym) it was generated from - to_write.append((data, sym, value)) - else: - item = next(iter_args) - if isinstance(item, list): - to_write += item - else: - if isinstance(item, Token): - # annotate the leaf with the specific rule (data) and - # terminal (sym) it was generated from - to_write.append((data, sym, item)) - else: - to_write.append(item) - - return to_write - - -class HCLReconstructor(Reconstructor): +from typing import List, Union + +from lark import Tree, Token +from hcl2.rules import tokens +from hcl2.rules.base import BlockRule +from hcl2.rules.for_expressions import ForIntroRule +from hcl2.rules.literal_rules import IdentifierRule +from hcl2.rules.strings import StringRule +from hcl2.rules.expressions import ( + ExprTermRule, + ConditionalRule, + UnaryOpRule, +) + + +class HCLReconstructor: """This class converts a Lark.Tree AST back into a string representing the underlying HCL code.""" - def __init__( - self, - parser: Lark, - term_subs: Optional[Dict[str, Callable[[Symbol], str]]] = None, - ): - Reconstructor.__init__(self, parser, term_subs) - - self.write_tokens: WriteTokensAndMetaTransformer = ( - WriteTokensAndMetaTransformer( - {token.name: token for token in self.tokens}, term_subs or {} - ) - ) - - # these variables track state during reconstruction to enable us to make - # informed decisions about formatting output. They are primarily used - # by the _should_add_space(...) method. - self._last_char_space = True - self._last_terminal: Union[Terminal, None] = None - self._last_rule: Union[Tree, Token, None] = None - self._deferred_item = None - - def should_be_wrapped_in_spaces(self, terminal: Terminal) -> bool: - """Whether given terminal should be wrapped in spaces""" - return terminal.name in { - "IF", - "IN", - "FOR", - "FOR_EACH", - "FOR_OBJECT_ARROW", - "COLON", - "QMARK", - "BINARY_OP", - } - - def _is_equals_sign(self, terminal) -> bool: - return ( - isinstance(self._last_rule, Token) - and self._last_rule.value in ("attribute", "object_elem") - and self._last_terminal == Terminal("EQ") - and terminal != Terminal("NL_OR_COMMENT") - ) - - # pylint: disable=too-many-branches, too-many-return-statements - def _should_add_space(self, rule, current_terminal, is_block_label: bool = False): - """ - This method documents the situations in which we add space around - certain tokens while reconstructing the generated HCL. - - Additional rules can be added here if the generated HCL has - improper whitespace (affecting parse OR affecting ability to perfectly - reconstruct a file down to the whitespace level.) - - It has the following information available to make its decision: - - - the last token (terminal) we output - - the last rule that token belonged to - - the current token (terminal) we're about to output - - the rule the current token belongs to - - This should be sufficient to make a spacing decision. - """ - - # we don't need to add multiple spaces - if self._last_char_space: - return False + _binary_op_types = { + "DOUBLE_EQ", + "NEQ", + "LT", + "GT", + "LEQ", + "GEQ", + "MINUS", + "ASTERISK", + "SLASH", + "PERCENT", + "DOUBLE_AMP", + "DOUBLE_PIPE", + "PLUS", + } - # we don't add a space at the start of the file - if not self._last_terminal or not self._last_rule: + def __init__(self): + self._reset_state() + + def _reset_state(self): + """State tracking for formatting decisions""" + self._last_was_space = True + self._current_indent = 0 + self._last_token_name = None + self._last_rule_name = None + self._in_parentheses = False + self._in_object = False + self._in_tuple = False + + def _should_add_space_before( + self, current_node: Union[Tree, Token], parent_rule_name: str = None + ) -> bool: + """Determine if we should add a space before the current token/rule.""" + + # Don't add space if we already have one + if self._last_was_space: return False - if self._is_equals_sign(current_terminal): - return True + # Don't add space at the beginning + if self._last_token_name is None: + return False - if is_block_label: - pass - # print(rule, self._last_rule, current_terminal, self._last_terminal) + if isinstance(current_node, Token): + token_type = current_node.type - if is_block_label and isinstance(rule, Token) and rule.value == "string": + # Space before '{' in blocks if ( - current_terminal == self._last_terminal == Terminal("DBLQUOTE") - or current_terminal == Terminal("DBLQUOTE") - and self._last_terminal == Terminal("IDENTIFIER") + token_type == tokens.LBRACE.lark_name() + and parent_rule_name == BlockRule.lark_name() ): - # print("true") return True - # if we're in a ternary or binary operator, add space around the operator - if ( - isinstance(rule, Token) - and rule.value - in [ - "conditional", - "binary_operator", - ] - and self.should_be_wrapped_in_spaces(current_terminal) - ): - return True - - # if we just left a ternary or binary operator, add space around the - # operator unless there's a newline already - if ( - isinstance(self._last_rule, Token) - and self._last_rule.value - in [ - "conditional", - "binary_operator", - ] - and self.should_be_wrapped_in_spaces(self._last_terminal) - and current_terminal != Terminal("NL_OR_COMMENT") - ): - return True - - # if we're in a for or if statement and find a keyword, add a space - if ( - isinstance(rule, Token) - and rule.value - in [ - "for_object_expr", - "for_cond", - "for_intro", - ] - and self.should_be_wrapped_in_spaces(current_terminal) - ): - return True - - # if we've just left a for or if statement and find a keyword, add a - # space, unless we have a newline - if ( - isinstance(self._last_rule, Token) - and self._last_rule.value - in [ - "for_object_expr", - "for_cond", - "for_intro", - ] - and self.should_be_wrapped_in_spaces(self._last_terminal) - and current_terminal != Terminal("NL_OR_COMMENT") - ): - return True - - # if we're in a block - if (isinstance(rule, Token) and rule.value == "block") or ( - isinstance(rule, str) and re.match(r"^__block_(star|plus)_.*", rule) - ): - # always add space before the starting brace - if current_terminal == Terminal("LBRACE"): + # Space around Conditional Expression operators + if ( + parent_rule_name == ConditionalRule.lark_name() + and token_type in [tokens.COLON.lark_name(), tokens.QMARK.lark_name()] + or self._last_token_name + in [tokens.COLON.lark_name(), tokens.QMARK.lark_name()] + ): return True - # always add space before the closing brace - if current_terminal == Terminal( - "RBRACE" - ) and self._last_terminal != Terminal("LBRACE"): + # Space after + if ( + parent_rule_name == ForIntroRule.lark_name() + and token_type == tokens.COLON.lark_name() + ): + return True - # always add space between string literals - if current_terminal == Terminal("STRING_CHARS"): + # Space after commas in tuples and function arguments... + if self._last_token_name == tokens.COMMA.lark_name(): + # ... except for last comma + if token_type == tokens.RSQB.lark_name(): + return False return True - # if we just opened a block, add a space, unless the block is empty - # or has a newline - if ( - isinstance(self._last_rule, Token) - and self._last_rule.value == "block" - and self._last_terminal == Terminal("LBRACE") - and current_terminal not in [Terminal("RBRACE"), Terminal("NL_OR_COMMENT")] - ): - return True - - # if we're in a tuple or function arguments (this rule matches commas between items) - if isinstance(self._last_rule, str) and re.match( - r"^__(tuple|arguments)_(star|plus)_.*", self._last_rule - ): - - # string literals, decimals, and identifiers should always be - # preceded by a space if they're following a comma in a tuple or - # function arg - if current_terminal in [ - Terminal("DBLQUOTE"), - Terminal("DECIMAL"), - Terminal("NAME"), - Terminal("NEGATIVE_DECIMAL"), + if token_type in [ + tokens.FOR.lark_name(), + tokens.IN.lark_name(), + tokens.IF.lark_name(), + tokens.ELLIPSIS.lark_name(), ]: return True - # the catch-all case, we're not sure, so don't add a space - return False - - def _reconstruct(self, tree, is_block_label=False): - unreduced_tree = self.match_tree(tree, tree.data) - res = self.write_tokens.transform(unreduced_tree) - for item in res: - # any time we encounter a child tree, we recurse - if isinstance(item, Tree): - yield from self._reconstruct( - item, (unreduced_tree.data == "block" and item.data != "body") - ) - - # every leaf should be a tuple, which contains information about - # which terminal the leaf represents - elif isinstance(item, tuple): - rule, terminal, value = item - - # first, handle any deferred items - if self._deferred_item is not None: - ( - deferred_rule, - deferred_terminal, - deferred_value, - ) = self._deferred_item - - # if we deferred a comma and the next character ends a - # parenthesis or block, we can throw it out - if deferred_terminal == Terminal("COMMA") and terminal in [ - Terminal("RPAR"), - Terminal("RBRACE"), - ]: - pass - # in any other case, we print the deferred item - else: - yield deferred_value - - # and do our bookkeeping - self._last_terminal = deferred_terminal - self._last_rule = deferred_rule - if deferred_value and not deferred_value[-1].isspace(): - self._last_char_space = False - - # clear the deferred item - self._deferred_item = None - - # potentially add a space before the next token - if self._should_add_space(rule, terminal, is_block_label): - yield " " - self._last_char_space = True - - # potentially defer the item if needed - if terminal in [Terminal("COMMA")]: - self._deferred_item = item - else: - # otherwise print the next token - yield value - - # and do our bookkeeping so we can make an informed - # decision about formatting next time - self._last_terminal = terminal - self._last_rule = rule - if value: - self._last_char_space = value[-1].isspace() - - else: - raise RuntimeError(f"Unknown bare token type: {item}") - - def reconstruct(self, tree, postproc=None, insert_spaces=False): - """Convert a Lark.Tree AST back into a string representation of HCL.""" - return Reconstructor.reconstruct( - self, - tree, - postproc, - insert_spaces, - ) - - -class HCLReverseTransformer: - """ - The reverse of hcl2.transformer.DictTransformer. This method attempts to - convert a dict back into a working AST, which can be written back out. - """ - - @staticmethod - def _name_to_identifier(name: str) -> Tree: - """Converts a string to a NAME token within an identifier rule.""" - return Tree(Token("RULE", "identifier"), [Token("NAME", name)]) - - @staticmethod - def _escape_interpolated_str(interp_s: str) -> str: - if interp_s.strip().startswith("<<-") or interp_s.strip().startswith("<<"): - # For heredoc strings, preserve their format exactly - return reverse_quotes_within_interpolation(interp_s) - # Escape backslashes first (very important to do this first) - escaped = interp_s.replace("\\", "\\\\") - # Escape quotes - escaped = escaped.replace('"', '\\"') - # Escape control characters - escaped = escaped.replace("\n", "\\n") - escaped = escaped.replace("\r", "\\r") - escaped = escaped.replace("\t", "\\t") - escaped = escaped.replace("\b", "\\b") - escaped = escaped.replace("\f", "\\f") - # find each interpolation within the string and remove the backslashes - interp_s = reverse_quotes_within_interpolation(f"{escaped}") - return interp_s - - @staticmethod - def _block_has_label(block: dict) -> bool: - return len(block.keys()) == 1 - - def __init__(self): - pass - - def transform(self, hcl_dict: dict) -> Tree: - """Given a dict, return a Lark.Tree representing the HCL AST.""" - level = 0 - body = self._transform_dict_to_body(hcl_dict, level) - start = Tree(Token("RULE", "start"), [body]) - return start - - @staticmethod - def _is_string_wrapped_tf(interp_s: str) -> bool: - """ - Determines whether a string is a complex HCL data structure - wrapped in ${ interpolation } characters. - """ - if not interp_s.startswith("${") or not interp_s.endswith("}"): - return False + if ( + self._last_token_name + in [ + tokens.FOR.lark_name(), + tokens.IN.lark_name(), + tokens.IF.lark_name(), + ] + and token_type != "NL_OR_COMMENT" + ): + return True - nested_tokens = [] - for match in re.finditer(r"\$?\{|}", interp_s): - if match.group(0) in ["${", "{"]: - nested_tokens.append(match.group(0)) - elif match.group(0) == "}": - nested_tokens.pop() - - # if we exit ${ interpolation } before the end of the string, - # this interpolated string has string parts and can't represent - # a valid HCL expression on its own (without quotes) - if len(nested_tokens) == 0 and match.end() != len(interp_s): - return False + # Space around for_object arrow + if tokens.FOR_OBJECT_ARROW.lark_name() in [ + token_type, + self._last_token_name, + ]: + return True - return True - - @classmethod - def _unwrap_interpolation(cls, value: str) -> str: - if cls._is_string_wrapped_tf(value): - return value[2:-1] - return value - - def _newline(self, level: int, count: int = 1) -> Tree: - return Tree( - Token("RULE", "new_line_or_comment"), - [Token("NL_OR_COMMENT", f"\n{' ' * level}") for _ in range(count)], - ) - - def _build_string_rule(self, string: str, level: int = 0) -> Tree: - # grammar in hcl2.lark defines that a string is built of any number of string parts, - # each string part can be either interpolation expression, escaped interpolation string - # or regular string - # this method build hcl2 string rule based on arbitrary string, - # splitting such string into individual parts and building a lark tree out of them - # - result = [] + # Space after ellipsis in function arguments + if self._last_token_name == tokens.ELLIPSIS.lark_name(): + return True - pattern = regex.compile(r"(\${1,2}\{(?:[^{}]|(?R))*\})") - parts = [part for part in pattern.split(string) if part != ""] - # e.g. 'aaa$${bbb}ccc${"ddd-${eee}"}' -> ['aaa', '$${bbb}', 'ccc', '${"ddd-${eee}"}'] - # 'aa-${"bb-${"cc-${"dd-${5 + 5}"}"}"}' -> ['aa-', '${"bb-${"cc-${"dd-${5 + 5}"}"}"}'] - - for part in parts: - if part.startswith("$${") and part.endswith("}"): - result.append(Token("ESCAPED_INTERPOLATION", part)) - - # unwrap interpolation expression and recurse into it - elif part.startswith("${") and part.endswith("}"): - part = part[2:-1] - if part.startswith('"') and part.endswith('"'): - part = part[1:-1] - part = self._transform_value_to_expr_term(part, level) - else: - part = Tree( - Token("RULE", "expr_term"), - [Tree(Token("RULE", "identifier"), [Token("NAME", part)])], - ) - - result.append(Tree(Token("RULE", "interpolation"), [part])) - - else: - result.append(Token("STRING_CHARS", part)) - - result = [Tree(Token("RULE", "string_part"), [element]) for element in result] - return Tree(Token("RULE", "string"), result) - - def _is_block(self, value: Any) -> bool: - if isinstance(value, dict): - block_body = value - if START_LINE_KEY in block_body.keys() or END_LINE_KEY in block_body.keys(): + if tokens.EQ.lark_name() in [token_type, self._last_token_name]: return True - try: - # if block is labeled, actual body might be nested - # pylint: disable=W0612 - block_label, block_body = next(iter(value.items())) - except StopIteration: - # no more potential labels = nothing more to check + # Don't add space around operator tokens inside unary_op + if parent_rule_name == UnaryOpRule.lark_name(): return False - return self._is_block(block_body) + if ( + token_type in self._binary_op_types + or self._last_token_name in self._binary_op_types + ): + return True + + elif isinstance(current_node, Tree): + rule_name = current_node.data - if isinstance(value, list): - if len(value) > 0: - return self._is_block(value[0]) + if parent_rule_name == BlockRule.lark_name(): + # Add space between multiple string/identifier labels in blocks + if rule_name in [ + StringRule.lark_name(), + IdentifierRule.lark_name(), + ] and self._last_rule_name in [ + StringRule.lark_name(), + IdentifierRule.lark_name(), + ]: + return True return False - def _calculate_block_labels(self, block: dict) -> Tuple[List[str], dict]: - # if block doesn't have a label - if len(block.keys()) != 1: - return [], block - - # otherwise, find the label - curr_label = list(block)[0] - potential_body = block[curr_label] - - # __start_line__ and __end_line__ metadata are not labels - if ( - START_LINE_KEY in potential_body.keys() - or END_LINE_KEY in potential_body.keys() - ): - return [curr_label], potential_body - - # recurse and append the label - next_label, block_body = self._calculate_block_labels(potential_body) - return [curr_label] + next_label, block_body - - # pylint:disable=R0914 - def _transform_dict_to_body(self, hcl_dict: dict, level: int) -> Tree: - # we add a newline at the top of a body within a block, not the root body - # >2 here is to ignore the __start_line__ and __end_line__ metadata - if level > 0 and len(hcl_dict) > 2: - children = [self._newline(level)] + def _reconstruct_tree(self, tree: Tree, parent_rule_name: str = None) -> List[str]: + """Recursively reconstruct a Tree node into HCL text fragments.""" + result = [] + rule_name = tree.data + + if rule_name == UnaryOpRule.lark_name(): + for i, child in enumerate(tree.children): + result.extend(self._reconstruct_node(child, rule_name)) + if i == 0: + # Suppress space between unary operator and its operand + self._last_was_space = True + + elif rule_name == ExprTermRule.lark_name(): + # Check if parenthesized + if ( + len(tree.children) >= 3 + and isinstance(tree.children[0], Token) + and tree.children[0].type == tokens.LPAR.lark_name() + and isinstance(tree.children[-1], Token) + and tree.children[-1].type == tokens.RPAR.lark_name() + ): + self._in_parentheses = True + + for child in tree.children: + result.extend(self._reconstruct_node(child, rule_name)) + + self._in_parentheses = False + + else: + for child in tree.children: + result.extend(self._reconstruct_node(child, rule_name)) + + if self._should_add_space_before(tree, parent_rule_name): + result.insert(0, " ") + + # Update state tracking + self._last_rule_name = rule_name + if result: + self._last_was_space = result[-1].endswith(" ") or result[-1].endswith("\n") + + return result + + def _reconstruct_token(self, token: Token, parent_rule_name: str = None) -> str: + """Reconstruct a Token node into HCL text fragments.""" + result = str(token.value) + if self._should_add_space_before(token, parent_rule_name): + result = " " + result + + self._last_token_name = token.type + if len(token) != 0: + self._last_was_space = result[-1].endswith(" ") or result[-1].endswith("\n") + + return result + + def _reconstruct_node( + self, node: Union[Tree, Token], parent_rule_name: str = None + ) -> List[str]: + """Reconstruct any node (Tree or Token) into HCL text fragments.""" + if isinstance(node, Tree): + return self._reconstruct_tree(node, parent_rule_name) + elif isinstance(node, Token): + return [self._reconstruct_token(node, parent_rule_name)] else: - children = [] - - # iterate through each attribute or sub-block of this block - for key, value in hcl_dict.items(): - if key in [START_LINE_KEY, END_LINE_KEY]: - continue - - # construct the identifier, whether that be a block type name or an attribute key - identifier_name = self._name_to_identifier(key) - - # first, check whether the value is a "block" - if self._is_block(value): - for block_v in value: - block_labels, block_body_dict = self._calculate_block_labels( - block_v - ) - block_label_trees = [ - self._build_string_rule(block_label, level) - for block_label in block_labels - ] - block_body = self._transform_dict_to_body( - block_body_dict, level + 1 - ) - - # create our actual block to add to our own body - block = Tree( - Token("RULE", "block"), - [identifier_name] + block_label_trees + [block_body], - ) - children.append(block) - # add empty line after block - new_line = self._newline(level - 1) - # add empty line with indentation for next element in the block - new_line.children.append(self._newline(level).children[0]) - - children.append(new_line) - - # if the value isn't a block, it's an attribute - else: - expr_term = self._transform_value_to_expr_term(value, level) - attribute = Tree( - Token("RULE", "attribute"), - [identifier_name, Token("EQ", " ="), expr_term], - ) - children.append(attribute) - children.append(self._newline(level)) - - # since we're leaving a block body here, reduce the indentation of the - # final newline if it exists - if ( - len(children) > 0 - and isinstance(children[-1], Tree) - and children[-1].data.type == "RULE" - and children[-1].data.value == "new_line_or_comment" - ): - children[-1] = self._newline(level - 1) - - return Tree(Token("RULE", "body"), children) - - # pylint: disable=too-many-branches, too-many-return-statements too-many-statements - def _transform_value_to_expr_term(self, value, level) -> Union[Token, Tree]: - """Transforms a value from a dictionary into an "expr_term" (a value in HCL2) - - Anything passed to this function is treated "naively". Any lists passed - are assumed to be tuples, and any dicts passed are assumed to be objects. - No more checks will be performed for either to see if they are "blocks" - as this check happens in `_transform_dict_to_body`. - """ - - # for lists, recursively turn the child elements into expr_terms and - # store within a tuple - if isinstance(value, list): - tuple_tree = Tree( - Token("RULE", "tuple"), - [ - self._transform_value_to_expr_term(tuple_v, level) - for tuple_v in value - ], - ) - return Tree(Token("RULE", "expr_term"), [tuple_tree]) - - if value is None: - return Tree( - Token("RULE", "expr_term"), - [Tree(Token("RULE", "identifier"), [Token("NAME", "null")])], - ) - - # for dicts, recursively turn the child k/v pairs into object elements - # and store within an object - if isinstance(value, dict): - elements = [] - - # if the object has elements, put it on a newline - if len(value) > 0: - elements.append(self._newline(level + 1)) - - # iterate through the items and add them to the object - for i, (k, dict_v) in enumerate(value.items()): - if k in [START_LINE_KEY, END_LINE_KEY]: - continue - - value_expr_term = self._transform_value_to_expr_term(dict_v, level + 1) - k = self._unwrap_interpolation(k) - elements.append( - Tree( - Token("RULE", "object_elem"), - [ - Tree( - Token("RULE", "object_elem_key"), - [Tree(Token("RULE", "identifier"), [Token("NAME", k)])], - ), - Token("EQ", " ="), - value_expr_term, - ], - ) - ) - - # add indentation appropriately - if i < len(value) - 1: - elements.append(self._newline(level + 1)) - else: - elements.append(self._newline(level)) - return Tree( - Token("RULE", "expr_term"), [Tree(Token("RULE", "object"), elements)] - ) - - # treat booleans appropriately - if isinstance(value, bool): - return Tree( - Token("RULE", "expr_term"), - [ - Tree( - Token("RULE", "identifier"), - [Token("NAME", "true" if value else "false")], - ) - ], - ) - - # store integers as literals, digit by digit - if isinstance(value, int): - return Tree( - Token("RULE", "expr_term"), - [ - Tree( - Token("RULE", "int_lit"), - [Token("DECIMAL", digit) for digit in str(value)], - ) - ], - ) - - if isinstance(value, float): - value = str(value) - literal = [] - - if value[0] == "-": - # pop two first chars - minus and a digit - literal.append(Token("NEGATIVE_DECIMAL", value[:2])) - value = value[2:] - - while value != "": - char = value[0] - - if char == ".": - # current char marks beginning of decimal part: pop all remaining chars and end the loop - literal.append(Token("DOT", char)) - literal.extend(Token("DECIMAL", char) for char in value[1:]) - break - - if char == "e": - # current char marks beginning of e-notation: pop all remaining chars and end the loop - literal.append(Token("EXP_MARK", value)) - break - - literal.append(Token("DECIMAL", char)) - value = value[1:] - - return Tree( - Token("RULE", "expr_term"), - [Tree(Token("RULE", "float_lit"), literal)], - ) - - # store strings as single literals - if isinstance(value, str): - # potentially unpack a complex syntax structure - if self._is_string_wrapped_tf(value): - # we have to unpack it by parsing it - wrapped_value = re.match(r"\$\{(.*)}", value).group(1) # type:ignore - ast = reconstruction_parser().parse(f"value = {wrapped_value}") - - if ast.data != Token("RULE", "start"): - raise RuntimeError("Token must be `start` RULE") - - body = ast.children[0] - if body.data != Token("RULE", "body"): - raise RuntimeError("Token must be `body` RULE") - - attribute = body.children[0] - if attribute.data != Token("RULE", "attribute"): - raise RuntimeError("Token must be `attribute` RULE") - - if attribute.children[1] != Token("EQ", " ="): - raise RuntimeError("Token must be `EQ (=)` rule") - - parsed_value = attribute.children[2] - return parsed_value - - # otherwise it's a string - return Tree( - Token("RULE", "expr_term"), - [self._build_string_rule(self._escape_interpolated_str(value), level)], - ) - - # otherwise, we don't know the type - raise RuntimeError(f"Unknown type to transform {type(value)}") + # Fallback: convert to string + return [str(node)] + + def reconstruct(self, tree: Tree, postproc=None, insert_spaces=False) -> str: + """Convert a Lark.Tree AST back into a string representation of HCL.""" + # Reset state + self._reset_state() + + # Reconstruct the tree + fragments = self._reconstruct_node(tree) + + # Join fragments and apply post-processing + result = "".join(fragments) + + if postproc: + result = postproc(result) + + # Ensure file ends with newline + if result and not result.endswith("\n"): + result += "\n" + + return result diff --git a/hcl2/rule_transformer/hcl2.lark b/hcl2/rule_transformer/hcl2.lark deleted file mode 100644 index 63154efb..00000000 --- a/hcl2/rule_transformer/hcl2.lark +++ /dev/null @@ -1,207 +0,0 @@ -// ============================================================================ -// Terminals -// ============================================================================ - -// Whitespace and Comments -NL_OR_COMMENT: /\n[ \t]*/ | /#.*\n/ | /\/\/.*\n/ | /\/\*(.|\n)*?(\*\/)/ - -// Keywords -IF : "if" -IN : "in" -FOR : "for" -FOR_EACH : "for_each" - - -// Literals -NAME : /[a-zA-Z_][a-zA-Z0-9_-]*/ -ESCAPED_INTERPOLATION.2: /\$\$\{[^}]*\}/ -STRING_CHARS.1: /(?:(?!\$\$\{)(?!\$\{)[^"\\]|\\.|(?:\$(?!\$?\{)))+/ -DECIMAL : "0".."9" -NEGATIVE_DECIMAL : "-" DECIMAL -EXP_MARK : ("e" | "E") ("+" | "-")? DECIMAL+ -INT_LITERAL: NEGATIVE_DECIMAL? DECIMAL+ -FLOAT_LITERAL: (NEGATIVE_DECIMAL? DECIMAL+ | NEGATIVE_DECIMAL+) "." DECIMAL+ (EXP_MARK)? - | (NEGATIVE_DECIMAL? DECIMAL+ | NEGATIVE_DECIMAL+) (EXP_MARK) - -// Operators -DOUBLE_EQ : "==" -NEQ : "!=" -LT : "<" -GT : ">" -LEQ : "<=" -GEQ : ">=" -MINUS : "-" -ASTERISK : "*" -SLASH : "/" -PERCENT : "%" -DOUBLE_AMP : "&&" -DOUBLE_PIPE : "||" -PLUS : "+" -NOT : "!" -QMARK : "?" - -// Punctuation -LPAR : "(" -RPAR : ")" -LBRACE : "{" -RBRACE : "}" -LSQB : "[" -RSQB : "]" -COMMA : "," -DOT : "." -EQ : /[ \t]*=(?!=|>)/ -COLON : ":" -DBLQUOTE : "\"" - -// Interpolation -INTERP_START : "${" - -// Splat Operators -ATTR_SPLAT : ".*" -FULL_SPLAT_START : "[*]" - -// Special Operators -FOR_OBJECT_ARROW : "=>" -ELLIPSIS : "..." -COLONS: "::" - -// Heredocs -HEREDOC_TEMPLATE : /<<(?P[a-zA-Z][a-zA-Z0-9._-]+)\n(?:.|\n)*?\n\s*(?P=heredoc)\n/ -HEREDOC_TEMPLATE_TRIM : /<<-(?P[a-zA-Z][a-zA-Z0-9._-]+)\n(?:.|\n)*?\n\s*(?P=heredoc_trim)\n/ - -// Ignore whitespace (but not newlines, as they're significant in HCL) -%ignore /[ \t]+/ - -// ============================================================================ -// Rules -// ============================================================================ - -// Top-level structure -start : body - -// Body and basic constructs -body : (new_line_or_comment? (attribute | block))* new_line_or_comment? -attribute : identifier EQ expression -block : identifier (identifier | string)* new_line_or_comment? LBRACE body RBRACE - -// Whitespace and comments -new_line_or_comment: ( NL_OR_COMMENT )+ - -// Basic literals and identifiers -identifier : NAME -keyword: IN | FOR | IF | FOR_EACH -int_lit: INT_LITERAL -float_lit: FLOAT_LITERAL -string: DBLQUOTE string_part* DBLQUOTE -string_part: STRING_CHARS - | ESCAPED_INTERPOLATION - | interpolation - -// Expressions -?expression : or_expr QMARK new_line_or_comment? expression new_line_or_comment? COLON new_line_or_comment? expression -> conditional - | or_expr -interpolation: INTERP_START expression RBRACE - -// Operator precedence ladder (lowest to highest) -// Each level uses left recursion for left-associativity. -// Rule aliases (-> binary_op, -> binary_term, -> binary_operator) maintain -// transformer compatibility with BinaryOpRule / BinaryTermRule / BinaryOperatorRule. - -// Logical OR -?or_expr : or_expr or_binary_term new_line_or_comment? -> binary_op - | and_expr -or_binary_term : or_binary_operator new_line_or_comment? and_expr -> binary_term -!or_binary_operator : DOUBLE_PIPE -> binary_operator - -// Logical AND -?and_expr : and_expr and_binary_term new_line_or_comment? -> binary_op - | eq_expr -and_binary_term : and_binary_operator new_line_or_comment? eq_expr -> binary_term -!and_binary_operator : DOUBLE_AMP -> binary_operator - -// Equality -?eq_expr : eq_expr eq_binary_term new_line_or_comment? -> binary_op - | rel_expr -eq_binary_term : eq_binary_operator new_line_or_comment? rel_expr -> binary_term -!eq_binary_operator : DOUBLE_EQ -> binary_operator - | NEQ -> binary_operator - -// Relational -?rel_expr : rel_expr rel_binary_term new_line_or_comment? -> binary_op - | add_expr -rel_binary_term : rel_binary_operator new_line_or_comment? add_expr -> binary_term -!rel_binary_operator : LT -> binary_operator - | GT -> binary_operator - | LEQ -> binary_operator - | GEQ -> binary_operator - -// Additive -?add_expr : add_expr add_binary_term new_line_or_comment? -> binary_op - | mul_expr -add_binary_term : add_binary_operator new_line_or_comment? mul_expr -> binary_term -!add_binary_operator : PLUS -> binary_operator - | MINUS -> binary_operator - -// Multiplicative -?mul_expr : mul_expr mul_binary_term new_line_or_comment? -> binary_op - | unary_expr -mul_binary_term : mul_binary_operator new_line_or_comment? unary_expr -> binary_term -!mul_binary_operator : ASTERISK -> binary_operator - | SLASH -> binary_operator - | PERCENT -> binary_operator - -// Unary (highest precedence for operations) -?unary_expr : unary_op | expr_term -!unary_op : (MINUS | NOT) expr_term - -// Expression terms -expr_term : LPAR new_line_or_comment? expression new_line_or_comment? RPAR - | float_lit - | int_lit - | string - | tuple - | object - | identifier - | function_call - | heredoc_template - | heredoc_template_trim - | index_expr_term - | get_attr_expr_term - | attr_splat_expr_term - | full_splat_expr_term - | for_tuple_expr - | for_object_expr - -// Collections -tuple : LSQB new_line_or_comment? (expression new_line_or_comment? COMMA new_line_or_comment?)* (expression new_line_or_comment? COMMA? new_line_or_comment?)? RSQB -object : LBRACE new_line_or_comment? ((object_elem | (object_elem new_line_or_comment? COMMA)) new_line_or_comment?)* RBRACE -object_elem : object_elem_key ( EQ | COLON ) expression -object_elem_key : float_lit | int_lit | identifier | string | object_elem_key_dot_accessor | object_elem_key_expression -object_elem_key_expression : LPAR expression RPAR -object_elem_key_dot_accessor : identifier (DOT identifier)+ - -// Heredocs -heredoc_template : HEREDOC_TEMPLATE -heredoc_template_trim : HEREDOC_TEMPLATE_TRIM - -// Functions -function_call : identifier (COLONS identifier COLONS identifier)? LPAR new_line_or_comment? arguments? new_line_or_comment? RPAR -arguments : (expression (new_line_or_comment? COMMA new_line_or_comment? expression)* (COMMA | ELLIPSIS)? new_line_or_comment?) - -// Indexing and attribute access -index_expr_term : expr_term index -get_attr_expr_term : expr_term get_attr -attr_splat_expr_term : expr_term attr_splat -full_splat_expr_term : expr_term full_splat -?index : braces_index | short_index -braces_index : LSQB new_line_or_comment? expression new_line_or_comment? RSQB -short_index : DOT INT_LITERAL -get_attr : DOT identifier -attr_splat : ATTR_SPLAT (get_attr | index)* -full_splat : FULL_SPLAT_START (get_attr | index)* - -// For expressions -!for_tuple_expr : LSQB new_line_or_comment? for_intro new_line_or_comment? expression new_line_or_comment? for_cond? new_line_or_comment? RSQB -!for_object_expr : LBRACE new_line_or_comment? for_intro new_line_or_comment? expression FOR_OBJECT_ARROW new_line_or_comment? expression new_line_or_comment? ELLIPSIS? new_line_or_comment? for_cond? new_line_or_comment? RBRACE -!for_intro : FOR new_line_or_comment? identifier (COMMA identifier new_line_or_comment?)? new_line_or_comment? IN new_line_or_comment? expression new_line_or_comment? COLON new_line_or_comment? -!for_cond : IF new_line_or_comment? expression diff --git a/hcl2/rule_transformer/json.py b/hcl2/rule_transformer/json.py deleted file mode 100644 index 647b6683..00000000 --- a/hcl2/rule_transformer/json.py +++ /dev/null @@ -1,12 +0,0 @@ -from json import JSONEncoder -from typing import Any - -from hcl2.rule_transformer.rules.abstract import LarkRule - - -class LarkEncoder(JSONEncoder): - def default(self, obj: Any): - if isinstance(obj, LarkRule): - return obj.serialize() - else: - return super().default(obj) diff --git a/hcl2/rule_transformer/reconstructor.py b/hcl2/rule_transformer/reconstructor.py deleted file mode 100644 index 099beead..00000000 --- a/hcl2/rule_transformer/reconstructor.py +++ /dev/null @@ -1,238 +0,0 @@ -from typing import List, Union - -from lark import Tree, Token -from hcl2.rule_transformer.rules import tokens -from hcl2.rule_transformer.rules.base import BlockRule -from hcl2.rule_transformer.rules.for_expressions import ForIntroRule -from hcl2.rule_transformer.rules.literal_rules import IdentifierRule -from hcl2.rule_transformer.rules.strings import StringRule -from hcl2.rule_transformer.rules.expressions import ( - ExprTermRule, - ConditionalRule, - UnaryOpRule, -) - - -class HCLReconstructor: - """This class converts a Lark.Tree AST back into a string representing the underlying HCL code.""" - - _binary_op_types = { - "DOUBLE_EQ", - "NEQ", - "LT", - "GT", - "LEQ", - "GEQ", - "MINUS", - "ASTERISK", - "SLASH", - "PERCENT", - "DOUBLE_AMP", - "DOUBLE_PIPE", - "PLUS", - } - - def __init__(self): - self._reset_state() - - def _reset_state(self): - """State tracking for formatting decisions""" - self._last_was_space = True - self._current_indent = 0 - self._last_token_name = None - self._last_rule_name = None - self._in_parentheses = False - self._in_object = False - self._in_tuple = False - - def _should_add_space_before( - self, current_node: Union[Tree, Token], parent_rule_name: str = None - ) -> bool: - """Determine if we should add a space before the current token/rule.""" - - # Don't add space if we already have one - if self._last_was_space: - return False - - # Don't add space at the beginning - if self._last_token_name is None: - return False - - if isinstance(current_node, Token): - token_type = current_node.type - - # Space before '{' in blocks - if ( - token_type == tokens.LBRACE.lark_name() - and parent_rule_name == BlockRule.lark_name() - ): - return True - - # Space around Conditional Expression operators - if ( - parent_rule_name == ConditionalRule.lark_name() - and token_type in [tokens.COLON.lark_name(), tokens.QMARK.lark_name()] - or self._last_token_name - in [tokens.COLON.lark_name(), tokens.QMARK.lark_name()] - ): - return True - - # Space after - if ( - parent_rule_name == ForIntroRule.lark_name() - and token_type == tokens.COLON.lark_name() - ): - - return True - - # Space after commas in tuples and function arguments... - if self._last_token_name == tokens.COMMA.lark_name(): - # ... except for last comma - if token_type == tokens.RSQB.lark_name(): - return False - return True - - if token_type in [ - tokens.FOR.lark_name(), - tokens.IN.lark_name(), - tokens.IF.lark_name(), - tokens.ELLIPSIS.lark_name(), - ]: - return True - - if ( - self._last_token_name - in [ - tokens.FOR.lark_name(), - tokens.IN.lark_name(), - tokens.IF.lark_name(), - ] - and token_type != "NL_OR_COMMENT" - ): - return True - - # Space around for_object arrow - if tokens.FOR_OBJECT_ARROW.lark_name() in [ - token_type, - self._last_token_name, - ]: - return True - - # Space after ellipsis in function arguments - if self._last_token_name == tokens.ELLIPSIS.lark_name(): - return True - - if tokens.EQ.lark_name() in [token_type, self._last_token_name]: - return True - - # Don't add space around operator tokens inside unary_op - if parent_rule_name == UnaryOpRule.lark_name(): - return False - - if ( - token_type in self._binary_op_types - or self._last_token_name in self._binary_op_types - ): - return True - - elif isinstance(current_node, Tree): - rule_name = current_node.data - - if parent_rule_name == BlockRule.lark_name(): - # Add space between multiple string/identifier labels in blocks - if rule_name in [ - StringRule.lark_name(), - IdentifierRule.lark_name(), - ] and self._last_rule_name in [ - StringRule.lark_name(), - IdentifierRule.lark_name(), - ]: - return True - - return False - - def _reconstruct_tree(self, tree: Tree, parent_rule_name: str = None) -> List[str]: - """Recursively reconstruct a Tree node into HCL text fragments.""" - result = [] - rule_name = tree.data - - if rule_name == UnaryOpRule.lark_name(): - for i, child in enumerate(tree.children): - result.extend(self._reconstruct_node(child, rule_name)) - if i == 0: - # Suppress space between unary operator and its operand - self._last_was_space = True - - elif rule_name == ExprTermRule.lark_name(): - # Check if parenthesized - if ( - len(tree.children) >= 3 - and isinstance(tree.children[0], Token) - and tree.children[0].type == tokens.LPAR.lark_name() - and isinstance(tree.children[-1], Token) - and tree.children[-1].type == tokens.RPAR.lark_name() - ): - self._in_parentheses = True - - for child in tree.children: - result.extend(self._reconstruct_node(child, rule_name)) - - self._in_parentheses = False - - else: - for child in tree.children: - result.extend(self._reconstruct_node(child, rule_name)) - - if self._should_add_space_before(tree, parent_rule_name): - result.insert(0, " ") - - # Update state tracking - self._last_rule_name = rule_name - if result: - self._last_was_space = result[-1].endswith(" ") or result[-1].endswith("\n") - - return result - - def _reconstruct_token(self, token: Token, parent_rule_name: str = None) -> str: - """Reconstruct a Token node into HCL text fragments.""" - result = str(token.value) - if self._should_add_space_before(token, parent_rule_name): - result = " " + result - - self._last_token_name = token.type - if len(token) != 0: - self._last_was_space = result[-1].endswith(" ") or result[-1].endswith("\n") - - return result - - def _reconstruct_node( - self, node: Union[Tree, Token], parent_rule_name: str = None - ) -> List[str]: - """Reconstruct any node (Tree or Token) into HCL text fragments.""" - if isinstance(node, Tree): - return self._reconstruct_tree(node, parent_rule_name) - elif isinstance(node, Token): - return [self._reconstruct_token(node, parent_rule_name)] - else: - # Fallback: convert to string - return [str(node)] - - def reconstruct(self, tree: Tree, postproc=None, insert_spaces=False) -> str: - """Convert a Lark.Tree AST back into a string representation of HCL.""" - # Reset state - self._reset_state() - - # Reconstruct the tree - fragments = self._reconstruct_node(tree) - - # Join fragments and apply post-processing - result = "".join(fragments) - - if postproc: - result = postproc(result) - - # Ensure file ends with newline - if result and not result.endswith("\n"): - result += "\n" - - return result diff --git a/hcl2/rule_transformer/rules/__init__.py b/hcl2/rule_transformer/rules/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/hcl2/rule_transformer/__init__.py b/hcl2/rules/__init__.py similarity index 100% rename from hcl2/rule_transformer/__init__.py rename to hcl2/rules/__init__.py diff --git a/hcl2/rule_transformer/rules/abstract.py b/hcl2/rules/abstract.py similarity index 97% rename from hcl2/rule_transformer/rules/abstract.py rename to hcl2/rules/abstract.py index e83fed2b..a494d901 100644 --- a/hcl2/rule_transformer/rules/abstract.py +++ b/hcl2/rules/abstract.py @@ -5,7 +5,7 @@ from lark.exceptions import VisitError from lark.tree import Meta -from hcl2.rule_transformer.utils import SerializationOptions, SerializationContext +from hcl2.utils import SerializationOptions, SerializationContext class LarkElement(ABC): diff --git a/hcl2/rule_transformer/rules/base.py b/hcl2/rules/base.py similarity index 88% rename from hcl2/rule_transformer/rules/base.py rename to hcl2/rules/base.py index c879b772..a025949a 100644 --- a/hcl2/rule_transformer/rules/base.py +++ b/hcl2/rules/base.py @@ -4,14 +4,14 @@ from lark.tree import Meta from hcl2.const import IS_BLOCK -from hcl2.rule_transformer.rules.abstract import LarkRule, LarkToken -from hcl2.rule_transformer.rules.expressions import ExpressionRule, ExprTermRule -from hcl2.rule_transformer.rules.literal_rules import IdentifierRule -from hcl2.rule_transformer.rules.strings import StringRule -from hcl2.rule_transformer.rules.tokens import NAME, EQ, LBRACE, RBRACE - -from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule -from hcl2.rule_transformer.utils import SerializationOptions, SerializationContext +from hcl2.rules.abstract import LarkRule, LarkToken +from hcl2.rules.expressions import ExpressionRule, ExprTermRule +from hcl2.rules.literal_rules import IdentifierRule +from hcl2.rules.strings import StringRule +from hcl2.rules.tokens import NAME, EQ, LBRACE, RBRACE + +from hcl2.rules.whitespace import NewLineOrCommentRule +from hcl2.utils import SerializationOptions, SerializationContext class AttributeRule(LarkRule): diff --git a/hcl2/rule_transformer/rules/containers.py b/hcl2/rules/containers.py similarity index 93% rename from hcl2/rule_transformer/rules/containers.py rename to hcl2/rules/containers.py index a2f53436..4d7310c8 100644 --- a/hcl2/rule_transformer/rules/containers.py +++ b/hcl2/rules/containers.py @@ -1,14 +1,14 @@ from typing import Tuple, List, Optional, Union, Any -from hcl2.rule_transformer.rules.abstract import LarkRule -from hcl2.rule_transformer.rules.expressions import ExpressionRule -from hcl2.rule_transformer.rules.literal_rules import ( +from hcl2.rules.abstract import LarkRule +from hcl2.rules.expressions import ExpressionRule +from hcl2.rules.literal_rules import ( FloatLitRule, IntLitRule, IdentifierRule, ) -from hcl2.rule_transformer.rules.strings import StringRule -from hcl2.rule_transformer.rules.tokens import ( +from hcl2.rules.strings import StringRule +from hcl2.rules.tokens import ( COLON, EQ, LBRACE, @@ -20,11 +20,11 @@ RPAR, DOT, ) -from hcl2.rule_transformer.rules.whitespace import ( +from hcl2.rules.whitespace import ( NewLineOrCommentRule, InlineCommentMixIn, ) -from hcl2.rule_transformer.utils import ( +from hcl2.utils import ( SerializationOptions, SerializationContext, to_dollar_string, diff --git a/hcl2/rule_transformer/rules/expressions.py b/hcl2/rules/expressions.py similarity index 95% rename from hcl2/rule_transformer/rules/expressions.py rename to hcl2/rules/expressions.py index db256e82..1e1d0cd8 100644 --- a/hcl2/rule_transformer/rules/expressions.py +++ b/hcl2/rules/expressions.py @@ -4,16 +4,16 @@ from lark.tree import Meta -from hcl2.rule_transformer.rules.abstract import ( +from hcl2.rules.abstract import ( LarkToken, ) -from hcl2.rule_transformer.rules.literal_rules import BinaryOperatorRule -from hcl2.rule_transformer.rules.tokens import LPAR, RPAR, QMARK, COLON -from hcl2.rule_transformer.rules.whitespace import ( +from hcl2.rules.literal_rules import BinaryOperatorRule +from hcl2.rules.tokens import LPAR, RPAR, QMARK, COLON +from hcl2.rules.whitespace import ( NewLineOrCommentRule, InlineCommentMixIn, ) -from hcl2.rule_transformer.utils import ( +from hcl2.utils import ( wrap_into_parentheses, to_dollar_string, SerializationOptions, diff --git a/hcl2/rule_transformer/rules/for_expressions.py b/hcl2/rules/for_expressions.py similarity index 95% rename from hcl2/rule_transformer/rules/for_expressions.py rename to hcl2/rules/for_expressions.py index 3a89aba3..a1f24dcb 100644 --- a/hcl2/rule_transformer/rules/for_expressions.py +++ b/hcl2/rules/for_expressions.py @@ -2,10 +2,10 @@ from lark.tree import Meta -from hcl2.rule_transformer.rules.abstract import LarkRule, LarkElement -from hcl2.rule_transformer.rules.expressions import ExpressionRule -from hcl2.rule_transformer.rules.literal_rules import IdentifierRule -from hcl2.rule_transformer.rules.tokens import ( +from hcl2.rules.abstract import LarkRule, LarkElement +from hcl2.rules.expressions import ExpressionRule +from hcl2.rules.literal_rules import IdentifierRule +from hcl2.rules.tokens import ( LSQB, RSQB, LBRACE, @@ -18,11 +18,11 @@ ELLIPSIS, FOR_OBJECT_ARROW, ) -from hcl2.rule_transformer.rules.whitespace import ( +from hcl2.rules.whitespace import ( NewLineOrCommentRule, InlineCommentMixIn, ) -from hcl2.rule_transformer.utils import ( +from hcl2.utils import ( SerializationOptions, SerializationContext, to_dollar_string, diff --git a/hcl2/rule_transformer/rules/functions.py b/hcl2/rules/functions.py similarity index 90% rename from hcl2/rule_transformer/rules/functions.py rename to hcl2/rules/functions.py index 92cc8b11..380b959b 100644 --- a/hcl2/rule_transformer/rules/functions.py +++ b/hcl2/rules/functions.py @@ -1,14 +1,14 @@ from functools import lru_cache from typing import Any, Optional, Tuple, Union, List -from hcl2.rule_transformer.rules.expressions import ExpressionRule -from hcl2.rule_transformer.rules.literal_rules import IdentifierRule -from hcl2.rule_transformer.rules.tokens import COMMA, ELLIPSIS, StringToken, LPAR, RPAR -from hcl2.rule_transformer.rules.whitespace import ( +from hcl2.rules.expressions import ExpressionRule +from hcl2.rules.literal_rules import IdentifierRule +from hcl2.rules.tokens import COMMA, ELLIPSIS, StringToken, LPAR, RPAR +from hcl2.rules.whitespace import ( InlineCommentMixIn, NewLineOrCommentRule, ) -from hcl2.rule_transformer.utils import ( +from hcl2.utils import ( SerializationOptions, SerializationContext, to_dollar_string, diff --git a/hcl2/rule_transformer/rules/indexing.py b/hcl2/rules/indexing.py similarity index 94% rename from hcl2/rule_transformer/rules/indexing.py rename to hcl2/rules/indexing.py index 20decf00..fc8cbf90 100644 --- a/hcl2/rule_transformer/rules/indexing.py +++ b/hcl2/rules/indexing.py @@ -2,21 +2,21 @@ from lark.tree import Meta -from hcl2.rule_transformer.rules.abstract import LarkRule -from hcl2.rule_transformer.rules.expressions import ExprTermRule, ExpressionRule -from hcl2.rule_transformer.rules.literal_rules import IdentifierRule -from hcl2.rule_transformer.rules.tokens import ( +from hcl2.rules.abstract import LarkRule +from hcl2.rules.expressions import ExprTermRule, ExpressionRule +from hcl2.rules.literal_rules import IdentifierRule +from hcl2.rules.tokens import ( DOT, IntLiteral, LSQB, RSQB, ATTR_SPLAT, ) -from hcl2.rule_transformer.rules.whitespace import ( +from hcl2.rules.whitespace import ( InlineCommentMixIn, NewLineOrCommentRule, ) -from hcl2.rule_transformer.utils import ( +from hcl2.utils import ( SerializationOptions, to_dollar_string, SerializationContext, diff --git a/hcl2/rule_transformer/rules/literal_rules.py b/hcl2/rules/literal_rules.py similarity index 85% rename from hcl2/rule_transformer/rules/literal_rules.py rename to hcl2/rules/literal_rules.py index baf8546f..2e5b8281 100644 --- a/hcl2/rule_transformer/rules/literal_rules.py +++ b/hcl2/rules/literal_rules.py @@ -1,8 +1,8 @@ from abc import ABC from typing import Any, Tuple -from hcl2.rule_transformer.rules.abstract import LarkRule, LarkToken -from hcl2.rule_transformer.utils import SerializationOptions, SerializationContext +from hcl2.rules.abstract import LarkRule, LarkToken +from hcl2.utils import SerializationOptions, SerializationContext class TokenRule(LarkRule, ABC): diff --git a/hcl2/rule_transformer/rules/strings.py b/hcl2/rules/strings.py similarity index 94% rename from hcl2/rule_transformer/rules/strings.py rename to hcl2/rules/strings.py index 4e28e976..248ab173 100644 --- a/hcl2/rule_transformer/rules/strings.py +++ b/hcl2/rules/strings.py @@ -1,9 +1,9 @@ import sys from typing import Tuple, List, Any, Union -from hcl2.rule_transformer.rules.abstract import LarkRule -from hcl2.rule_transformer.rules.expressions import ExpressionRule -from hcl2.rule_transformer.rules.tokens import ( +from hcl2.rules.abstract import LarkRule +from hcl2.rules.expressions import ExpressionRule +from hcl2.rules.tokens import ( INTERP_START, RBRACE, DBLQUOTE, @@ -12,7 +12,7 @@ HEREDOC_TEMPLATE, HEREDOC_TRIM_TEMPLATE, ) -from hcl2.rule_transformer.utils import ( +from hcl2.utils import ( SerializationOptions, SerializationContext, to_dollar_string, diff --git a/hcl2/rule_transformer/rules/tokens.py b/hcl2/rules/tokens.py similarity index 98% rename from hcl2/rule_transformer/rules/tokens.py rename to hcl2/rules/tokens.py index ba948d3e..b02be66e 100644 --- a/hcl2/rule_transformer/rules/tokens.py +++ b/hcl2/rules/tokens.py @@ -1,7 +1,7 @@ from functools import lru_cache from typing import Callable, Any, Type, Optional, Tuple -from hcl2.rule_transformer.rules.abstract import LarkToken +from hcl2.rules.abstract import LarkToken class StringToken(LarkToken): diff --git a/hcl2/rule_transformer/rules/tree.py b/hcl2/rules/tree.py similarity index 100% rename from hcl2/rule_transformer/rules/tree.py rename to hcl2/rules/tree.py diff --git a/hcl2/rule_transformer/rules/whitespace.py b/hcl2/rules/whitespace.py similarity index 90% rename from hcl2/rule_transformer/rules/whitespace.py rename to hcl2/rules/whitespace.py index 62069b78..5f2fa886 100644 --- a/hcl2/rule_transformer/rules/whitespace.py +++ b/hcl2/rules/whitespace.py @@ -1,9 +1,9 @@ from abc import ABC from typing import Optional, List, Any, Tuple -from hcl2.rule_transformer.rules.abstract import LarkToken, LarkRule -from hcl2.rule_transformer.rules.literal_rules import TokenRule -from hcl2.rule_transformer.utils import SerializationOptions, SerializationContext +from hcl2.rules.abstract import LarkToken, LarkRule +from hcl2.rules.literal_rules import TokenRule +from hcl2.utils import SerializationOptions, SerializationContext class NewLineOrCommentRule(TokenRule): diff --git a/hcl2/rule_transformer/transformer.py b/hcl2/transformer.py similarity index 93% rename from hcl2/rule_transformer/transformer.py rename to hcl2/transformer.py index 931eab8e..07230fe5 100644 --- a/hcl2/rule_transformer/transformer.py +++ b/hcl2/transformer.py @@ -2,13 +2,13 @@ from lark import Token, Tree, v_args, Transformer, Discard from lark.tree import Meta -from hcl2.rule_transformer.rules.base import ( +from hcl2.rules.base import ( StartRule, BodyRule, BlockRule, AttributeRule, ) -from hcl2.rule_transformer.rules.containers import ( +from hcl2.rules.containers import ( ObjectRule, ObjectElemRule, ObjectElemKeyRule, @@ -16,21 +16,21 @@ ObjectElemKeyExpressionRule, ObjectElemKeyDotAccessor, ) -from hcl2.rule_transformer.rules.expressions import ( +from hcl2.rules.expressions import ( BinaryTermRule, UnaryOpRule, BinaryOpRule, ExprTermRule, ConditionalRule, ) -from hcl2.rule_transformer.rules.for_expressions import ( +from hcl2.rules.for_expressions import ( ForTupleExprRule, ForObjectExprRule, ForIntroRule, ForCondRule, ) -from hcl2.rule_transformer.rules.functions import ArgumentsRule, FunctionCallRule -from hcl2.rule_transformer.rules.indexing import ( +from hcl2.rules.functions import ArgumentsRule, FunctionCallRule +from hcl2.rules.indexing import ( IndexExprTermRule, SqbIndexRule, ShortIndexRule, @@ -41,28 +41,28 @@ FullSplatRule, FullSplatExprTermRule, ) -from hcl2.rule_transformer.rules.literal_rules import ( +from hcl2.rules.literal_rules import ( FloatLitRule, IntLitRule, IdentifierRule, BinaryOperatorRule, KeywordRule, ) -from hcl2.rule_transformer.rules.strings import ( +from hcl2.rules.strings import ( InterpolationRule, StringRule, StringPartRule, HeredocTemplateRule, HeredocTrimTemplateRule, ) -from hcl2.rule_transformer.rules.tokens import ( +from hcl2.rules.tokens import ( NAME, IntLiteral, FloatLiteral, StringToken, StaticStringToken, ) -from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule +from hcl2.rules.whitespace import NewLineOrCommentRule class RuleTransformer(Transformer): diff --git a/hcl2/rule_transformer/utils.py b/hcl2/utils.py similarity index 100% rename from hcl2/rule_transformer/utils.py rename to hcl2/utils.py From ba80334cd0ab6c567f425cd3813e5ed98132880c Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sun, 22 Feb 2026 16:51:26 +0100 Subject: [PATCH 14/24] minor improvements to deserializer.py and formatter.py --- hcl2/deserializer.py | 43 +++++++++++++----------------- hcl2/formatter.py | 63 ++++++++++++++------------------------------ 2 files changed, 39 insertions(+), 67 deletions(-) diff --git a/hcl2/deserializer.py b/hcl2/deserializer.py index 2290809c..d6b4d4c2 100644 --- a/hcl2/deserializer.py +++ b/hcl2/deserializer.py @@ -1,8 +1,8 @@ import json from abc import ABC, abstractmethod from dataclasses import dataclass -from functools import lru_cache -from typing import Any, TextIO, List, Union, Optional +from functools import cached_property +from typing import Any, TextIO, List, Union from regex import regex @@ -55,7 +55,6 @@ HEREDOC_TEMPLATE, COLON, ) -from hcl2.rules.whitespace import NewLineOrCommentRule from hcl2.transformer import RuleTransformer from hcl2.utils import HEREDOC_TRIM_PATTERN, HEREDOC_PATTERN @@ -83,11 +82,8 @@ def load(self, file: TextIO) -> LarkElement: class BaseDeserializer(LarkElementTreeDeserializer): def __init__(self, options=None): super().__init__(options) - self._current_line = 1 - self._last_new_line: Optional[NewLineOrCommentRule] = None - @property - @lru_cache + @cached_property def _transformer(self) -> RuleTransformer: return RuleTransformer() @@ -119,27 +115,29 @@ def _deserialize(self, value: Any) -> LarkElement: def _deserialize_block_elements(self, value: dict) -> List[LarkRule]: children = [] - for key, value in value.items(): - if self._is_block(value): + for key, val in value.items(): + if self._is_block(val): # this value is a list of blocks, iterate over each block and deserialize them - for block in value: + for block in val: children.append(self._deserialize_block(key, block)) else: # otherwise it's just an attribute if key != IS_BLOCK: - children.append(self._deserialize_attribute(key, value)) + children.append(self._deserialize_attribute(key, val)) return children def _deserialize_text(self, value: Any) -> LarkRule: - try: - int_val = int(value) - if "." in str(value): - return FloatLitRule([FloatLiteral(float(value))]) - return IntLitRule([IntLiteral(int_val)]) - except ValueError: - pass + # bool must be checked before int since bool is a subclass of int + if isinstance(value, bool): + return self._deserialize_identifier(str(value).lower()) + + if isinstance(value, float): + return FloatLitRule([FloatLiteral(value)]) + + if isinstance(value, int): + return IntLitRule([IntLiteral(value)]) if isinstance(value, str): if value.startswith('"') and value.endswith('"'): @@ -160,9 +158,6 @@ def _deserialize_text(self, value: Any) -> LarkRule: return self._deserialize_identifier(value) - elif isinstance(value, bool): - return self._deserialize_identifier(str(value).lower()) - return self._deserialize_identifier(str(value)) def _deserialize_identifier(self, value: str) -> IdentifierRule: @@ -283,8 +278,8 @@ def _deserialize_list(self, value: List) -> TupleRule: def _deserialize_object(self, value: dict) -> ObjectRule: children = [] - for key, value in value.items(): - children.append(self._deserialize_object_elem(key, value)) + for key, val in value.items(): + children.append(self._deserialize_object_elem(key, val)) if self.options.object_elements_trailing_comma: children.append(COMMA()) @@ -342,6 +337,6 @@ def _contains_block_marker(self, obj: dict) -> bool: return True if isinstance(value, list): for element in value: - if self._contains_block_marker(element): + if isinstance(element, dict) and self._contains_block_marker(element): return True return False diff --git a/hcl2/formatter.py b/hcl2/formatter.py index 205d2ddd..35fb6b05 100644 --- a/hcl2/formatter.py +++ b/hcl2/formatter.py @@ -42,8 +42,7 @@ def format_tree(self, tree: LarkElement): class BaseFormatter(LarkElementTreeFormatter): def __init__(self, options: FormatterOptions = None): super().__init__(options) - self._current_line = 1 - self._current_indent_level = 0 + self._last_new_line: NewLineOrCommentRule = None def format_tree(self, tree: LarkElement): if isinstance(tree, StartRule): @@ -51,9 +50,6 @@ def format_tree(self, tree: LarkElement): def format_start_rule(self, rule: StartRule): self.format_body_rule(rule.body, 0) - # for child in rule.body.children: - # if isinstance(child, BlockRule): - # self.format_block_rule(child, 1) def format_block_rule(self, rule: BlockRule, indent_level: int = 0): if self.options.vertically_align_attributes: @@ -87,7 +83,8 @@ def format_body_rule(self, rule: BodyRule, indent_level: int = 0): new_children.insert(-2, self._build_newline(indent_level)) new_children.append(self._build_newline(indent_level, 2)) - new_children.pop(-1) + if new_children: + new_children.pop(-1) rule._children = new_children def format_attribute_rule(self, rule: AttributeRule, indent_level: int = 0): @@ -158,7 +155,7 @@ def format_expression(self, rule: ExprTermRule, indent_level: int = 0): self.format_forobjectexpr(rule.expression, indent_level) elif isinstance(rule.expression, ExprTermRule): - self.format_expression(rule.expression) + self.format_expression(rule.expression, indent_level) def format_fortupleexpr(self, expression: ForTupleExprRule, indent_level: int = 0): for child in expression.children: @@ -169,7 +166,6 @@ def format_fortupleexpr(self, expression: ForTupleExprRule, indent_level: int = for index in indexes: expression.children[index] = self._build_newline(indent_level) self._deindent_last_line() - # expression.children[8] = self._build_newline(indent_level - 1) def format_forobjectexpr( self, expression: ForObjectExprRule, indent_level: int = 0 @@ -192,23 +188,28 @@ def _vertically_align_attributes_in_body(self, body: BodyRule): attributes_sequence.append(child) elif attributes_sequence: - max_length = max( - len(attribute.identifier.token.value) - for attribute in attributes_sequence - ) - for attribute in attributes_sequence: - name_length = len(attribute.identifier.token.value) - spaces_to_add = max_length - name_length - attribute.children[1].set_value( - " " * spaces_to_add + attribute.children[1].value - ) + self._align_attributes_sequence(attributes_sequence) attributes_sequence = [] + if attributes_sequence: + self._align_attributes_sequence(attributes_sequence) + + def _align_attributes_sequence(self, attributes_sequence: List[AttributeRule]): + max_length = max( + len(attribute.identifier.token.value) + for attribute in attributes_sequence + ) + for attribute in attributes_sequence: + name_length = len(attribute.identifier.token.value) + spaces_to_add = max_length - name_length + attribute.children[1].set_value( + " " * spaces_to_add + attribute.children[1].value + ) + def _vertically_align_object_elems(self, rule: ObjectRule): max_length = max(len(elem.key.serialize()) for elem in rule.elements) for elem in rule.elements: key_length = len(elem.key.serialize()) - print(elem.key.serialize(), key_length) spaces_to_add = max_length - key_length @@ -218,17 +219,6 @@ def _vertically_align_object_elems(self, rule: ObjectRule): elem.children[1].set_value(" " * spaces_to_add + separator.value) - def _move_to_next_line(self, times: int = 1): - self._current_line += times - - def _increase_indent_level(self, times: int = 1): - self._current_indent_level += times - - def _decrease_indent_level(self, times: int = 1): - self._current_indent_level -= times - if self._current_indent_level < 0: - self._current_indent_level = 0 - def _build_newline( self, next_line_indent: int = 0, count: int = 1 ) -> NewLineOrCommentRule: @@ -247,16 +237,3 @@ def _deindent_last_line(self, times: int = 1): for i in range(times): if token.value.endswith(" " * self.options.indent_length): token.set_value(token.value[: -self.options.indent_length]) - - # def _build_meta(self, indent_level: int = 0, length: int = 0) -> Meta: - # result = Meta() - # result.empty = length == 0 - # result.line = self._current_line - # result.column = indent_level * self.options.indent_length - # # result.start_pos = - # # result.end_line = - # # result.end_column = - # # result.end_pos = - # # result.orig_expansion = - # # result.match_tree = - # return result From e32d3e3028b3f808c9c3f865135bffe25aaa1b5c Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sun, 22 Feb 2026 17:50:06 +0100 Subject: [PATCH 15/24] add round-trip test suite --- test/round_trip/__init__.py | 0 .../hcl2_original/operator_precedence.tf | 15 ++ test/round_trip/hcl2_original/smoke.tf | 72 ++++++ .../hcl2_reconstructed/operator_precedence.tf | 15 ++ test/round_trip/hcl2_reconstructed/smoke.tf | 64 +++++ .../operator_precedence.json | 20 ++ test/round_trip/json_reserialized/smoke.json | 70 ++++++ .../json_serialized/operator_precedence.json | 20 ++ test/round_trip/json_serialized/smoke.json | 70 ++++++ .../special/operator_precedence.json | 20 ++ test/round_trip/test_round_trip.py | 224 ++++++++++++++++++ test/unit/__init__.py | 1 - test/unit/test_builder.py | 110 --------- test/unit/test_dict_transformer.py | 32 --- test/unit/test_hcl2_syntax.py | 193 --------------- test/unit/test_load.py | 57 ----- test/unit/test_load_with_meta.py | 23 -- test/unit/test_reconstruct_ast.py | 112 --------- test/unit/test_reconstruct_dict.py | 88 ------- 19 files changed, 590 insertions(+), 616 deletions(-) create mode 100644 test/round_trip/__init__.py create mode 100644 test/round_trip/hcl2_original/operator_precedence.tf create mode 100644 test/round_trip/hcl2_original/smoke.tf create mode 100644 test/round_trip/hcl2_reconstructed/operator_precedence.tf create mode 100644 test/round_trip/hcl2_reconstructed/smoke.tf create mode 100644 test/round_trip/json_reserialized/operator_precedence.json create mode 100644 test/round_trip/json_reserialized/smoke.json create mode 100644 test/round_trip/json_serialized/operator_precedence.json create mode 100644 test/round_trip/json_serialized/smoke.json create mode 100644 test/round_trip/special/operator_precedence.json create mode 100644 test/round_trip/test_round_trip.py delete mode 100644 test/unit/__init__.py delete mode 100644 test/unit/test_builder.py delete mode 100644 test/unit/test_dict_transformer.py delete mode 100644 test/unit/test_hcl2_syntax.py delete mode 100644 test/unit/test_load.py delete mode 100644 test/unit/test_load_with_meta.py delete mode 100644 test/unit/test_reconstruct_ast.py delete mode 100644 test/unit/test_reconstruct_dict.py diff --git a/test/round_trip/__init__.py b/test/round_trip/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/round_trip/hcl2_original/operator_precedence.tf b/test/round_trip/hcl2_original/operator_precedence.tf new file mode 100644 index 00000000..f8351161 --- /dev/null +++ b/test/round_trip/hcl2_original/operator_precedence.tf @@ -0,0 +1,15 @@ +locals { + addition_1 = ((a + b) + c) + addition_2 = a + b + addition_3 = (a + b) + eq_before_and = var.env == "prod" && var.debug + and_before_ternary = true && true ? 1 : 0 + mixed_arith_cmp = var.a + var.b * var.c > 10 + full_chain = a + b == c && d || e + left_assoc_sub = a - b - c + left_assoc_mul_div = (a * b) / c + nested_ternary = (a ? b : c) ? d : e + unary_precedence = !a && b + neg_precedence = (-a) + b + neg_parentheses = -(a + b) +} diff --git a/test/round_trip/hcl2_original/smoke.tf b/test/round_trip/hcl2_original/smoke.tf new file mode 100644 index 00000000..d741a6ac --- /dev/null +++ b/test/round_trip/hcl2_original/smoke.tf @@ -0,0 +1,72 @@ + +block label1 label2 { + a = 5 + b = 1256.5 + c = 15 + (10 * 12) + d = (- a) + e = ( + a == b + ? true : false + ) + f = "${"this is a string"}" + g = 1 == 2 + h = { + k1 = 5, + k2 = 10 + , + "k3" = {k4 = "a"} + (5 + 5) = "d" + k5.attr.attr = "e" + } + i = [ + a, b + , + "c${aaa}", + d, + [1, 2, 3,], + f(a), + provider::func::aa(5) + + ] + j = func( + a, b + , c, + d ... + + ) + k = a.b.5 + l = a.*.b + m = a[*][c].a.*.1 + + block b1 { + a = 1 + } +} + +block label1 label3 { + simple_interpolation = "prefix:${var}-suffix" + embedded_interpolation = "(long substring without interpolation); ${"aaa-${local}-${local}"}/us-west-2/key_foo" + deeply_nested_interpolation = "prefix1-${"prefix2-${"prefix3-$${foo:bar}"}"}" + escaped_interpolation = "prefix:$${aws:username}-suffix" + simple_and_escaped = "${"bar"}$${baz:bat}" + simple_and_escaped_reversed = "$${baz:bat}${"bar"}" + nested_escaped = "bar-${"$${baz:bat}"}" +} + + +block { + route53_forwarding_rule_shares = { + for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) : + "${forwarding_rule_key}" => { + aws_account_ids = [ + for account_name in var.route53_resolver_forwarding_rule_shares[ + forwarding_rule_key + ].aws_account_names : + module.remote_state_subaccounts.map[account_name].outputs["aws_account_id"] + ] + } + ... + if + substr(bucket_name, 0, 1) == "l" + } +} diff --git a/test/round_trip/hcl2_reconstructed/operator_precedence.tf b/test/round_trip/hcl2_reconstructed/operator_precedence.tf new file mode 100644 index 00000000..323759aa --- /dev/null +++ b/test/round_trip/hcl2_reconstructed/operator_precedence.tf @@ -0,0 +1,15 @@ +locals { + addition_1 = ((a + b) + c) + addition_2 = a + b + addition_3 = (a + b) + eq_before_and = var.env == "prod" && var.debug + and_before_ternary = true && true ? 1 : 0 + mixed_arith_cmp = var.a + var.b * var.c > 10 + full_chain = a + b == c && d || e + left_assoc_sub = a - b - c + left_assoc_mul_div = (a * b) / c + nested_ternary = (a ? b : c) ? d : e + unary_precedence = !a && b + neg_precedence = (-a) + b + neg_parentheses = -(a + b) +} diff --git a/test/round_trip/hcl2_reconstructed/smoke.tf b/test/round_trip/hcl2_reconstructed/smoke.tf new file mode 100644 index 00000000..b5c54e96 --- /dev/null +++ b/test/round_trip/hcl2_reconstructed/smoke.tf @@ -0,0 +1,64 @@ +block label1 label2 { + a = 5 + b = 1256.5 + c = 15 + (10 * 12) + d = (-a) + e = (a == b ? true : false) + f = "${"this is a string"}" + g = 1 == 2 + h = { + k1 = 5, + k2 = 10, + "k3" = { + k4 = "a", + }, + (5 + 5) = "d", + k5.attr.attr = "e", + } + i = [ + a, + b, + "c${aaa}", + d, + [ + 1, + 2, + 3, + ], + f(a), + provider::func::aa(), + ] + j = func(a, b, c, d) + k = a.b.5 + l = a.*.b + m = a[*][c].a.*.1 + + block b1 { + a = 1 + } +} + + +block label1 label3 { + simple_interpolation = "prefix:${var}-suffix" + embedded_interpolation = "(long substring without interpolation); ${"aaa-${local}-${local}"}/us-west-2/key_foo" + deeply_nested_interpolation = "prefix1-${"prefix2-${"prefix3-$${foo:bar}"}"}" + escaped_interpolation = "prefix:$${aws:username}-suffix" + simple_and_escaped = "${"bar"}$${baz:bat}" + simple_and_escaped_reversed = "$${baz:bat}${"bar"}" + nested_escaped = "bar-${"$${baz:bat}"}" +} + + +block { + route53_forwarding_rule_shares = { + for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) : + "${forwarding_rule_key}" => { + aws_account_ids = [ + for account_name in var.route53_resolver_forwarding_rule_shares[forwarding_rule_key].aws_account_names : + module.remote_state_subaccounts.map[account_name].outputs["aws_account_id"] + + ] + } ... if substr(bucket_name, 0, 1) == "l" + } +} diff --git a/test/round_trip/json_reserialized/operator_precedence.json b/test/round_trip/json_reserialized/operator_precedence.json new file mode 100644 index 00000000..5c611ea7 --- /dev/null +++ b/test/round_trip/json_reserialized/operator_precedence.json @@ -0,0 +1,20 @@ +{ + "locals": [ + { + "addition_1": "${((a + b) + c)}", + "addition_2": "${a + b}", + "addition_3": "${(a + b)}", + "eq_before_and": "${var.env == \"prod\" && var.debug}", + "and_before_ternary": "${true && true ? 1 : 0}", + "mixed_arith_cmp": "${var.a + var.b * var.c > 10}", + "full_chain": "${a + b == c && d || e}", + "left_assoc_sub": "${a - b - c}", + "left_assoc_mul_div": "${(a * b) / c}", + "nested_ternary": "${(a ? b : c) ? d : e}", + "unary_precedence": "${!a && b}", + "neg_precedence": "${(-a) + b}", + "neg_parentheses": "${-(a + b)}", + "__is_block__": true + } + ] +} diff --git a/test/round_trip/json_reserialized/smoke.json b/test/round_trip/json_reserialized/smoke.json new file mode 100644 index 00000000..48544f85 --- /dev/null +++ b/test/round_trip/json_reserialized/smoke.json @@ -0,0 +1,70 @@ +{ + "block": [ + { + "label1": { + "label2": { + "a": 5, + "b": 1256.5, + "c": "${15 + (10 * 12)}", + "d": "${(-a)}", + "e": "${(a == b ? true : false)}", + "f": "\"${\"this is a string\"}\"", + "g": "${1 == 2}", + "h": { + "k1": 5, + "k2": 10, + "\"k3\"": { + "k4": "\"a\"" + }, + "${(5 + 5)}": "\"d\"", + "k5.attr.attr": "\"e\"" + }, + "i": [ + "a", + "b", + "\"c${aaa}\"", + "d", + [ + 1, + 2, + 3 + ], + "${f(a)}", + "${provider::func::aa()}" + ], + "j": "${func(a, b, c, d)}", + "k": "${a.b.5}", + "l": "${a.*.b}", + "m": "${a[*][c].a.*.1}", + "block": [ + { + "b1": { + "a": 1, + "__is_block__": true + } + } + ], + "__is_block__": true + } + } + }, + { + "label1": { + "label3": { + "simple_interpolation": "\"prefix:${var}-suffix\"", + "embedded_interpolation": "\"(long substring without interpolation); ${\"aaa-${local}-${local}\"}/us-west-2/key_foo\"", + "deeply_nested_interpolation": "\"prefix1-${\"prefix2-${\"prefix3-$${foo:bar}\"}\"}\"", + "escaped_interpolation": "\"prefix:$${aws:username}-suffix\"", + "simple_and_escaped": "\"${\"bar\"}$${baz:bat}\"", + "simple_and_escaped_reversed": "\"$${baz:bat}${\"bar\"}\"", + "nested_escaped": "\"bar-${\"$${baz:bat}\"}\"", + "__is_block__": true + } + } + }, + { + "route53_forwarding_rule_shares": "${{for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) : \"${forwarding_rule_key}\" => {aws_account_ids = [for account_name in var.route53_resolver_forwarding_rule_shares[forwarding_rule_key].aws_account_names : module.remote_state_subaccounts.map[account_name].outputs[\"aws_account_id\"]]}... if substr(bucket_name, 0, 1) == \"l\"}}", + "__is_block__": true + } + ] +} diff --git a/test/round_trip/json_serialized/operator_precedence.json b/test/round_trip/json_serialized/operator_precedence.json new file mode 100644 index 00000000..5c611ea7 --- /dev/null +++ b/test/round_trip/json_serialized/operator_precedence.json @@ -0,0 +1,20 @@ +{ + "locals": [ + { + "addition_1": "${((a + b) + c)}", + "addition_2": "${a + b}", + "addition_3": "${(a + b)}", + "eq_before_and": "${var.env == \"prod\" && var.debug}", + "and_before_ternary": "${true && true ? 1 : 0}", + "mixed_arith_cmp": "${var.a + var.b * var.c > 10}", + "full_chain": "${a + b == c && d || e}", + "left_assoc_sub": "${a - b - c}", + "left_assoc_mul_div": "${(a * b) / c}", + "nested_ternary": "${(a ? b : c) ? d : e}", + "unary_precedence": "${!a && b}", + "neg_precedence": "${(-a) + b}", + "neg_parentheses": "${-(a + b)}", + "__is_block__": true + } + ] +} diff --git a/test/round_trip/json_serialized/smoke.json b/test/round_trip/json_serialized/smoke.json new file mode 100644 index 00000000..48544f85 --- /dev/null +++ b/test/round_trip/json_serialized/smoke.json @@ -0,0 +1,70 @@ +{ + "block": [ + { + "label1": { + "label2": { + "a": 5, + "b": 1256.5, + "c": "${15 + (10 * 12)}", + "d": "${(-a)}", + "e": "${(a == b ? true : false)}", + "f": "\"${\"this is a string\"}\"", + "g": "${1 == 2}", + "h": { + "k1": 5, + "k2": 10, + "\"k3\"": { + "k4": "\"a\"" + }, + "${(5 + 5)}": "\"d\"", + "k5.attr.attr": "\"e\"" + }, + "i": [ + "a", + "b", + "\"c${aaa}\"", + "d", + [ + 1, + 2, + 3 + ], + "${f(a)}", + "${provider::func::aa()}" + ], + "j": "${func(a, b, c, d)}", + "k": "${a.b.5}", + "l": "${a.*.b}", + "m": "${a[*][c].a.*.1}", + "block": [ + { + "b1": { + "a": 1, + "__is_block__": true + } + } + ], + "__is_block__": true + } + } + }, + { + "label1": { + "label3": { + "simple_interpolation": "\"prefix:${var}-suffix\"", + "embedded_interpolation": "\"(long substring without interpolation); ${\"aaa-${local}-${local}\"}/us-west-2/key_foo\"", + "deeply_nested_interpolation": "\"prefix1-${\"prefix2-${\"prefix3-$${foo:bar}\"}\"}\"", + "escaped_interpolation": "\"prefix:$${aws:username}-suffix\"", + "simple_and_escaped": "\"${\"bar\"}$${baz:bat}\"", + "simple_and_escaped_reversed": "\"$${baz:bat}${\"bar\"}\"", + "nested_escaped": "\"bar-${\"$${baz:bat}\"}\"", + "__is_block__": true + } + } + }, + { + "route53_forwarding_rule_shares": "${{for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) : \"${forwarding_rule_key}\" => {aws_account_ids = [for account_name in var.route53_resolver_forwarding_rule_shares[forwarding_rule_key].aws_account_names : module.remote_state_subaccounts.map[account_name].outputs[\"aws_account_id\"]]}... if substr(bucket_name, 0, 1) == \"l\"}}", + "__is_block__": true + } + ] +} diff --git a/test/round_trip/special/operator_precedence.json b/test/round_trip/special/operator_precedence.json new file mode 100644 index 00000000..35adb5bb --- /dev/null +++ b/test/round_trip/special/operator_precedence.json @@ -0,0 +1,20 @@ +{ + "locals": [ + { + "addition_1": "${((a + b) + c)}", + "addition_2": "${a + b}", + "addition_3": "${(a + b)}", + "eq_before_and": "${(var.env == \"prod\") && var.debug}", + "and_before_ternary": "${(true && true) ? 1 : 0}", + "mixed_arith_cmp": "${(var.a + (var.b * var.c)) > 10}", + "full_chain": "${(((a + b) == c) && d) || e}", + "left_assoc_sub": "${(a - b) - c}", + "left_assoc_mul_div": "${(a * b) / c}", + "nested_ternary": "${(a ? b : c) ? d : e}", + "unary_precedence": "${(!a) && b}", + "neg_precedence": "${(-a) + b}", + "neg_parentheses": "${-(a + b)}", + "__is_block__": true + } + ] +} diff --git a/test/round_trip/test_round_trip.py b/test/round_trip/test_round_trip.py new file mode 100644 index 00000000..b43340b6 --- /dev/null +++ b/test/round_trip/test_round_trip.py @@ -0,0 +1,224 @@ +"""Round-trip tests for the HCL2 → JSON → HCL2 pipeline. + +Every test starts from the source HCL files in test/round_trip/hcl2/ and +runs the pipeline forward from there, comparing actuals against expected +outputs at each stage: + +1. HCL → JSON serialization (parse + transform + serialize) +2. JSON → JSON reserialization (serialize + deserialize + reserialize) +3. JSON → HCL reconstruction (serialize + deserialize + format + reconstruct) +4. Full round-trip (HCL → JSON → HCL → JSON produces identical JSON) +""" + +import json +from enum import Enum +from pathlib import Path +from typing import List +from unittest import TestCase + +from hcl2 import parses +from hcl2.deserializer import BaseDeserializer +from hcl2.formatter import BaseFormatter +from hcl2.reconstructor import HCLReconstructor +from hcl2.transformer import RuleTransformer +from hcl2.utils import SerializationOptions + +ROUND_TRIP_DIR = Path(__file__).absolute().parent +HCL2_ORIGINAL_DIR = ROUND_TRIP_DIR / "hcl2_original" + +_STEP_DIRS = { + "hcl2_original": HCL2_ORIGINAL_DIR, + "hcl2_reconstructed": ROUND_TRIP_DIR / "hcl2_reconstructed", + "json_serialized": ROUND_TRIP_DIR / "json_serialized", + "json_reserialized": ROUND_TRIP_DIR / "json_reserialized", + "json_operator_precedence": ROUND_TRIP_DIR / "json_operator_precedence", +} + +_STEP_SUFFIXES = { + "hcl2_original": ".tf", + "hcl2_reconstructed": ".tf", + "json_serialized": ".json", + "json_reserialized": ".json", + "json_operator_precedence": ".json", +} + + +class SuiteStep(Enum): + ORIGINAL = "hcl2_original" + RECONSTRUCTED = "hcl2_reconstructed" + JSON_SERIALIZED = "json_serialized" + JSON_RESERIALIZED = "json_reserialized" + JSON_OPERATOR_PRECEDENCE = "json_operator_precedence" + + +def _get_suites() -> List[str]: + """ + Get a list of the test suites. + Names of a test suite is a name of file in `test/round_trip/hcl2_original/` without the .tf suffix. + + Override SUITES to run a specific subset, e.g. SUITES = ["config"] + """ + return SUITES or sorted( + file.stem for file in HCL2_ORIGINAL_DIR.iterdir() if file.is_file() + ) + + +# set this to arbitrary list of test suites to run, +# e.g. `SUITES = ["smoke"]` to run the tests only for `test/round_trip/hcl2_original/smoke.tf` +SUITES: List[str] = [] + + +def _get_suite_file(suite_name: str, step: SuiteStep) -> Path: + """Return the path for a given suite name and pipeline step.""" + return _STEP_DIRS[step.value] / (suite_name + _STEP_SUFFIXES[step.value]) + + +def _parse_and_serialize(hcl_text: str, options: SerializationOptions = None) -> dict: + """Parse HCL text and serialize to a Python dict.""" + parsed_tree = parses(hcl_text) + rules = RuleTransformer().transform(parsed_tree) + if options: + return rules.serialize(options=options) + return rules.serialize() + + +def _deserialize_and_reserialize(serialized: dict) -> dict: + """Deserialize a Python dict back through the rule tree and reserialize.""" + deserializer = BaseDeserializer() + formatter = BaseFormatter() + deserialized = deserializer.load_python(serialized) + formatter.format_tree(deserialized) + return deserialized.serialize() + + +def _deserialize_and_reconstruct(serialized: dict) -> str: + """Deserialize a Python dict and reconstruct HCL text.""" + deserializer = BaseDeserializer() + formatter = BaseFormatter() + reconstructor = HCLReconstructor() + deserialized = deserializer.load_python(serialized) + formatter.format_tree(deserialized) + lark_tree = deserialized.to_lark() + return reconstructor.reconstruct(lark_tree) + + +class TestRoundTripSerialization(TestCase): + """Test HCL2 → JSON serialization: parse HCL, transform, serialize, compare with expected JSON.""" + + maxDiff = None + + def test_hcl_to_json(self): + for suite in _get_suites(): + yield self.check_hcl_to_json, suite + + def check_hcl_to_json(self, suite: str): + hcl_path = _get_suite_file(suite, SuiteStep.ORIGINAL) + json_path = _get_suite_file(suite, SuiteStep.JSON_SERIALIZED) + + actual = _parse_and_serialize(hcl_path.read_text()) + expected = json.loads(json_path.read_text()) + + self.assertEqual( + actual, + expected, + f"HCL → JSON serialization mismatch for {suite}", + ) + + +class TestRoundTripReserialization(TestCase): + """Test JSON → JSON reserialization: parse HCL, serialize, deserialize, reserialize, compare with expected.""" + + maxDiff = None + + def test_json_reserialization(self): + for suite in _get_suites(): + yield self.check_json_reserialization, suite + + def check_json_reserialization(self, suite: str): + hcl_path = _get_suite_file(suite, SuiteStep.ORIGINAL) + json_reserialized_path = _get_suite_file(suite, SuiteStep.JSON_RESERIALIZED) + + serialized = _parse_and_serialize(hcl_path.read_text()) + actual = _deserialize_and_reserialize(serialized) + + expected = json.loads(json_reserialized_path.read_text()) + self.assertEqual( + actual, + expected, + f"JSON reserialization mismatch for {suite}", + ) + + +class TestRoundTripReconstruction(TestCase): + """Test JSON → HCL reconstruction: parse HCL, serialize, deserialize, format, reconstruct, compare with expected HCL.""" + + maxDiff = None + + def test_json_to_hcl(self): + for suite in _get_suites(): + yield self.check_json_to_hcl, suite + + def check_json_to_hcl(self, suite: str): + hcl_path = _get_suite_file(suite, SuiteStep.ORIGINAL) + hcl_reconstructed_path = _get_suite_file(suite, SuiteStep.RECONSTRUCTED) + + serialized = _parse_and_serialize(hcl_path.read_text()) + actual = _deserialize_and_reconstruct(serialized) + + expected = hcl_reconstructed_path.read_text() + self.assertMultiLineEqual( + actual, + expected, + f"HCL reconstruction mismatch for {suite}", + ) + + +class TestRoundTripFull(TestCase): + """Test full round-trip: HCL → JSON → HCL → JSON should produce matching JSON.""" + + maxDiff = None + + def test_full_round_trip(self): + for suite in _get_suites(): + yield self.check_full_round_trip, suite + + def check_full_round_trip(self, suite: str): + hcl_path = _get_suite_file(suite, SuiteStep.ORIGINAL) + original_hcl = hcl_path.read_text() + + # Forward: HCL → JSON + serialized = _parse_and_serialize(original_hcl) + + # Reconstruct: JSON → HCL + reconstructed_hcl = _deserialize_and_reconstruct(serialized) + + # Re-parse: reconstructed HCL → JSON + reserialized = _parse_and_serialize(reconstructed_hcl) + + self.assertEqual( + reserialized, + serialized, + f"Full round-trip mismatch for {suite}: " + f"HCL → JSON → HCL → JSON did not produce identical JSON", + ) + + +class TestOperatorPrecedence(TestCase): + """Test that parsed expressions correctly represent operator precedence. + + Serializes with force_operation_parentheses=True so that implicit + precedence becomes explicit parentheses in the output. + See: https://github.com/amplify-education/python-hcl2/issues/248 + """ + + maxDiff = None + _OPTIONS = SerializationOptions(force_operation_parentheses=True) + + def test_operator_precedence(self): + hcl_path = _get_suite_file("operator_precedence", SuiteStep.ORIGINAL) + json_path = SPECIAL_DIR / "operator_precedence.json" + + actual = _parse_and_serialize(hcl_path.read_text(), options=self._OPTIONS) + expected = json.loads(json_path.read_text()) + + self.assertEqual(actual, expected) diff --git a/test/unit/__init__.py b/test/unit/__init__.py deleted file mode 100644 index c497b297..00000000 --- a/test/unit/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Unit tests -- tests that verify the code of this egg in isolation""" diff --git a/test/unit/test_builder.py b/test/unit/test_builder.py deleted file mode 100644 index 2ce0cfed..00000000 --- a/test/unit/test_builder.py +++ /dev/null @@ -1,110 +0,0 @@ -# pylint:disable=C0116 - -"""Test building an HCL file from scratch""" - -from pathlib import Path -from unittest import TestCase - -import hcl2 -import hcl2.builder - - -HELPERS_DIR = Path(__file__).absolute().parent.parent / "helpers" -HCL2_DIR = HELPERS_DIR / "terraform-config" -JSON_DIR = HELPERS_DIR / "terraform-config-json" -HCL2_FILES = [str(file.relative_to(HCL2_DIR)) for file in HCL2_DIR.iterdir()] - - -class TestBuilder(TestCase): - """Test building a variety of hcl files""" - - # print any differences fully to the console - maxDiff = None - - def test_build_blocks_tf(self): - nested_builder = hcl2.Builder() - nested_builder.block("nested_block_1", ["a"], foo="bar") - nested_builder.block("nested_block_1", ["a", "b"], bar="foo") - nested_builder.block("nested_block_1", foobar="barfoo") - nested_builder.block("nested_block_2", barfoo="foobar") - - builder = hcl2.Builder() - builder.block("block", a=1) - builder.block("block", ["label"], __nested_builder__=nested_builder, b=2) - - self.compare_filenames(builder, "blocks.tf") - - def test_build_escapes_tf(self): - builder = hcl2.Builder() - - builder.block("block", ["block_with_newlines"], a="line1\nline2") - - self.compare_filenames(builder, "escapes.tf") - - def test_locals_embdedded_condition_tf(self): - builder = hcl2.Builder() - - builder.block( - "locals", - terraform={ - "channels": "${(local.running_in_ci ? local.ci_channels : local.local_channels)}", - "authentication": [], - "foo": None, - }, - ) - - self.compare_filenames(builder, "locals_embedded_condition.tf") - - def test_locals_embedded_function_tf(self): - builder = hcl2.Builder() - - function_test = ( - "${var.basename}-${var.forwarder_function_name}_" - '${md5("${var.vpc_id}${data.aws_region.current.name}")}' - ) - builder.block("locals", function_test=function_test) - - self.compare_filenames(builder, "locals_embedded_function.tf") - - def test_locals_embedded_interpolation_tf(self): - builder = hcl2.Builder() - - attributes = { - "simple_interpolation": "prefix:${var.foo}-suffix", - "embedded_interpolation": "(long substring without interpolation); " - '${module.special_constants.aws_accounts["aaa-${local.foo}-${local.bar}"]}/us-west-2/key_foo', - "deeply_nested_interpolation": 'prefix1-${"prefix2-${"prefix3-$${foo:bar}"}"}', - "escaped_interpolation": "prefix:$${aws:username}-suffix", - "simple_and_escaped": '${"bar"}$${baz:bat}', - "simple_and_escaped_reversed": '$${baz:bat}${"bar"}', - "nested_escaped": 'bar-${"$${baz:bat}"}', - } - - builder.block("locals", **attributes) - - self.compare_filenames(builder, "string_interpolations.tf") - - def test_provider_function_tf(self): - builder = hcl2.Builder() - - builder.block( - "locals", - name2='${provider::test2::test("a")}', - name3='${test("a")}', - ) - - self.compare_filenames(builder, "provider_function.tf") - - def compare_filenames(self, builder: hcl2.Builder, filename: str): - hcl_dict = builder.build() - hcl_ast = hcl2.reverse_transform(hcl_dict) - hcl_content_built = hcl2.writes(hcl_ast) - - hcl_path = (HCL2_DIR / filename).absolute() - with hcl_path.open("r") as hcl_file: - hcl_file_content = hcl_file.read() - self.assertMultiLineEqual( - hcl_content_built, - hcl_file_content, - f"file {filename} does not match its programmatically built version.", - ) diff --git a/test/unit/test_dict_transformer.py b/test/unit/test_dict_transformer.py deleted file mode 100644 index baad5ba9..00000000 --- a/test/unit/test_dict_transformer.py +++ /dev/null @@ -1,32 +0,0 @@ -# pylint:disable=C0114,C0116,C0103,W0612 - -from unittest import TestCase - -from hcl2.dict_transformer import DictTransformer - - -class TestDictTransformer(TestCase): - """Test behaviour of hcl2.transformer.DictTransformer class""" - - @staticmethod - def build_dict_transformer(with_meta: bool = False) -> DictTransformer: - return DictTransformer(with_meta) - - def test_to_string_dollar(self): - string_values = { - '"bool"': "bool", - '"number"': "number", - '"string"': "string", - "${value_1}": "${value_1}", - '"value_2': '${"value_2}', - 'value_3"': '${value_3"}', - '"value_4"': "value_4", - "value_5": "${value_5}", - } - - dict_transformer = self.build_dict_transformer() - - for value, expected in string_values.items(): - actual = dict_transformer.to_string_dollar(value) - - self.assertEqual(actual, expected) diff --git a/test/unit/test_hcl2_syntax.py b/test/unit/test_hcl2_syntax.py deleted file mode 100644 index 96113df3..00000000 --- a/test/unit/test_hcl2_syntax.py +++ /dev/null @@ -1,193 +0,0 @@ -# pylint:disable=C0114,C0116,C0103,W0612 - -import string # pylint:disable=W4901 # https://stackoverflow.com/a/16651393 -from unittest import TestCase - -from test.helpers.hcl2_helper import Hcl2Helper - -from lark import UnexpectedToken, UnexpectedCharacters - - -class TestHcl2Syntax(Hcl2Helper, TestCase): - """Test parsing individual elements of HCL2 syntax""" - - def test_argument(self): - syntax = self.build_argument("identifier", '"expression"') - result = self.load_to_dict(syntax) - self.assertDictEqual(result, {"identifier": "expression"}) - - def test_identifier_starts_with_digit(self): - for i in range(0, 10): - argument = self.build_argument(f"{i}id") - with self.assertRaises(UnexpectedToken) as e: - self.load_to_dict(argument) - assert ( - f"Unexpected token Token('DECIMAL', '{i}') at line 1, column 1" - in str(e) - ) - - def test_identifier_starts_with_special_chars(self): - chars = string.punctuation.replace("_", "") - for i in chars: - argument = self.build_argument(f"{i}id") - with self.assertRaises((UnexpectedToken, UnexpectedCharacters)) as e: - self.load_to_dict(argument) - - def test_identifier_contains_special_chars(self): - chars = string.punctuation.replace("_", "").replace("-", "") - for i in chars: - argument = self.build_argument(f"identifier{i}") - with self.assertRaises((UnexpectedToken, UnexpectedCharacters)) as e: - self.load_to_dict(argument) - - def test_identifier(self): - argument = self.build_argument("_-__identifier_-1234567890-_") - self.load_to_dict(argument) - - def test_block_no_labels(self): - block = """ - block { - } - """ - result = self.load_to_dict(block) - self.assertDictEqual(result, {"block": [{}]}) - - def test_block_single_label(self): - block = """ - block "label" { - } - """ - result = self.load_to_dict(block) - self.assertDictEqual(result, {"block": [{"label": {}}]}) - - def test_block_multiple_labels(self): - block = """ - block "label1" "label2" "label3" { - } - """ - result = self.load_to_dict(block) - self.assertDictEqual( - result, {"block": [{"label1": {"label2": {"label3": {}}}}]} - ) - - def test_unary_operation(self): - operations = [ - ("identifier = -10", {"identifier": -10}), - ("identifier = !true", {"identifier": "${!true}"}), - ] - for hcl, dict_ in operations: - result = self.load_to_dict(hcl) - self.assertDictEqual(result, dict_) - - def test_tuple(self): - tuple_ = """tuple = [ - identifier, - "string", 100, - true == false, - 5 + 5, function(), - ]""" - result = self.load_to_dict(tuple_) - self.assertDictEqual( - result, - { - "tuple": [ - "${identifier}", - "string", - 100, - "${true == false}", - "${5 + 5}", - "${function()}", - ] - }, - ) - - def test_object(self): - object_ = """object = { - key1: identifier, key2: "string", key3: 100, - key4: true == false // comment - key5: 5 + 5, key6: function(), - key7: value == null ? 1 : 0 - }""" - result = self.load_to_dict(object_) - self.assertDictEqual( - result, - { - "object": { - "key1": "${identifier}", - "key2": "string", - "key3": 100, - "key4": "${true == false}", - "key5": "${5 + 5}", - "key6": "${function()}", - "key7": "${value == null ? 1 : 0}", - } - }, - ) - - def test_function_call_and_arguments(self): - calls = { - "r = function()": {"r": "${function()}"}, - "r = function(arg1, arg2)": {"r": "${function(arg1, arg2)}"}, - """r = function( - arg1, arg2, - arg3, - ) - """: { - "r": "${function(arg1, arg2, arg3)}" - }, - } - - for call, expected in calls.items(): - result = self.load_to_dict(call) - self.assertDictEqual(result, expected) - - def test_index(self): - indexes = { - "r = identifier[10]": {"r": "${identifier[10]}"}, - "r = identifier.20": { - "r": "${identifier[2]}" - }, # TODO debug why `20` is parsed to `2` - """r = identifier["key"]""": {"r": '${identifier["key"]}'}, - """r = identifier.key""": {"r": "${identifier.key}"}, - } - for call, expected in indexes.items(): - result = self.load_to_dict(call) - self.assertDictEqual(result, expected) - - def test_e_notation(self): - literals = { - "var = 3e4": {"var": "${3e4}"}, - "var = 3.5e5": {"var": "${3.5e5}"}, - "var = -3e6": {"var": "${-3e6}"}, - "var = -2.3e4": {"var": "${-2.3e4}"}, - "var = -5e-2": {"var": "${-5e-2}"}, - "var = -6.1e-3": {"var": "${-6.1e-3}"}, - } - for actual, expected in literals.items(): - result = self.load_to_dict(actual) - self.assertDictEqual(result, expected) - - def test_null(self): - identifier = "var = null" - - expected = {"var": None} - - result = self.load_to_dict(identifier) - self.assertDictEqual(result, expected) - - def test_expr_term_parenthesis(self): - literals = { - "a = 1 * 2 + 3": {"a": "${1 * 2 + 3}"}, - "b = 1 * (2 + 3)": {"b": "${1 * (2 + 3)}"}, - "c = (1 * (2 + 3))": {"c": "${(1 * (2 + 3))}"}, - "conditional = value == null ? 1 : 0": { - "conditional": "${value == null ? 1 : 0}" - }, - "conditional = (value == null ? 1 : 0)": { - "conditional": "${(value == null ? 1 : 0)}" - }, - } - - for actual, expected in literals.items(): - result = self.load_to_dict(actual) - self.assertDictEqual(result, expected) diff --git a/test/unit/test_load.py b/test/unit/test_load.py deleted file mode 100644 index f9be8845..00000000 --- a/test/unit/test_load.py +++ /dev/null @@ -1,57 +0,0 @@ -""" Test parsing a variety of hcl files""" - -import json -from pathlib import Path -from unittest import TestCase - -from hcl2.parser import PARSER_FILE, parser -import hcl2 - - -HELPERS_DIR = Path(__file__).absolute().parent.parent / "helpers" -HCL2_DIR = HELPERS_DIR / "terraform-config" -JSON_DIR = HELPERS_DIR / "terraform-config-json" -HCL2_FILES = [str(file.relative_to(HCL2_DIR)) for file in HCL2_DIR.iterdir()] - - -class TestLoad(TestCase): - """Test parsing a variety of hcl files""" - - # print any differences fully to the console - maxDiff = None - - def test_load_terraform(self): - """Test parsing a set of hcl2 files and force recreating the parser file""" - - # create a parser to make sure that the parser file is created - parser() - - # delete the parser file to force it to be recreated - PARSER_FILE.unlink() - for hcl_path in HCL2_FILES: - yield self.check_terraform, hcl_path - - def test_load_terraform_from_cache(self): - """Test parsing a set of hcl2 files from a cached parser file""" - for hcl_path in HCL2_FILES: - yield self.check_terraform, hcl_path - - def check_terraform(self, hcl_path_str: str): - """Loads a single hcl2 file, parses it and compares with the expected json""" - hcl_path = (HCL2_DIR / hcl_path_str).absolute() - json_path = JSON_DIR / hcl_path.relative_to(HCL2_DIR).with_suffix(".json") - if not json_path.exists(): - assert ( - False - ), f"Expected json equivalent of the hcl file doesn't exist {json_path}" - - with hcl_path.open("r") as hcl_file, json_path.open("r") as json_file: - try: - hcl2_dict = hcl2.load(hcl_file) - except Exception as exc: - assert False, f"failed to tokenize terraform in `{hcl_path_str}`: {exc}" - - json_dict = json.load(json_file) - self.assertDictEqual( - hcl2_dict, json_dict, f"\n\nfailed comparing {hcl_path_str}" - ) diff --git a/test/unit/test_load_with_meta.py b/test/unit/test_load_with_meta.py deleted file mode 100644 index b081844e..00000000 --- a/test/unit/test_load_with_meta.py +++ /dev/null @@ -1,23 +0,0 @@ -"""Test parsing hcl files with meta parameters""" - -import json -from pathlib import Path -from unittest import TestCase - -import hcl2 - -TEST_WITH_META_DIR = Path(__file__).absolute().parent.parent / "helpers" / "with-meta" -TF_FILE_PATH = TEST_WITH_META_DIR / "data_sources.tf" -JSON_FILE_PATH = TEST_WITH_META_DIR / "data_sources.json" - - -class TestLoadWithMeta(TestCase): - """Test parsing hcl files with meta parameters""" - - def test_load_terraform_meta(self): - """Test load() with with_meta flag set to true.""" - with TF_FILE_PATH.open("r") as tf_file, JSON_FILE_PATH.open("r") as json_file: - self.assertDictEqual( - json.load(json_file), - hcl2.load(tf_file, with_meta=True), - ) diff --git a/test/unit/test_reconstruct_ast.py b/test/unit/test_reconstruct_ast.py deleted file mode 100644 index b9545def..00000000 --- a/test/unit/test_reconstruct_ast.py +++ /dev/null @@ -1,112 +0,0 @@ -""" Test reconstructing hcl files""" - -import json -from pathlib import Path -from unittest import TestCase - -import hcl2 - - -HELPERS_DIR = Path(__file__).absolute().parent.parent / "helpers" -HCL2_DIR = HELPERS_DIR / "terraform-config" -HCL2_FILES = [str(file.relative_to(HCL2_DIR)) for file in HCL2_DIR.iterdir()] -JSON_DIR = HELPERS_DIR / "terraform-config-json" - - -class TestReconstruct(TestCase): - """Test reconstructing a variety of hcl files""" - - # print any differences fully to the console - maxDiff = None - - def test_write_terraform(self): - """Test reconstructing a set of hcl2 files, to make sure they parse to the same structure""" - for hcl_path in HCL2_FILES: - yield self.check_terraform, hcl_path - - def test_write_terraform_exact(self): - """ - Test reconstructing a set of hcl2 files, to make sure they - reconstruct exactly the same, including whitespace. - """ - - # the reconstruction process is not precise, so some files do not - # reconstruct their whitespace exactly the same, but they are - # syntactically equivalent. This list is a target for further - # improvements to the whitespace handling of the reconstruction - # algorithm. - inexact_files = [ - # the reconstructor loses commas on the last element in an array, - # even if they're in the input file - "iam.tf", - "variables.tf", - # the reconstructor doesn't preserve indentation within comments - # perfectly - "multiline_expressions.tf", - # the reconstructor doesn't preserve the line that a ternary is - # broken on. - "route_table.tf", - ] - - for hcl_path in HCL2_FILES: - if hcl_path not in inexact_files: - yield self.check_whitespace, hcl_path - - def check_terraform(self, hcl_path_str: str): - """ - Loads a single hcl2 file, parses it, reconstructs it, - parses the reconstructed file, and compares with the expected json - """ - hcl_path = (HCL2_DIR / hcl_path_str).absolute() - json_path = JSON_DIR / hcl_path.relative_to(HCL2_DIR).with_suffix(".json") - with hcl_path.open("r") as hcl_file, json_path.open("r") as json_file: - hcl_file_content = hcl_file.read() - try: - hcl_ast = hcl2.parses(hcl_file_content) - except Exception as exc: - assert False, f"failed to tokenize terraform in `{hcl_path_str}`: {exc}" - - try: - hcl_reconstructed = hcl2.writes(hcl_ast) - except Exception as exc: - assert ( - False - ), f"failed to reconstruct terraform in `{hcl_path_str}`: {exc}" - - try: - hcl2_dict = hcl2.loads(hcl_reconstructed) - except Exception as exc: - assert ( - False - ), f"failed to tokenize terraform in file reconstructed from `{hcl_path_str}`: {exc}" - - json_dict = json.load(json_file) - self.assertDictEqual( - hcl2_dict, - json_dict, - f"failed comparing {hcl_path_str} with reconstructed version", - ) - - def check_whitespace(self, hcl_path_str: str): - """Tests that the reconstructed file matches the original file exactly.""" - hcl_path = (HCL2_DIR / hcl_path_str).absolute() - with hcl_path.open("r") as hcl_file: - hcl_file_content = hcl_file.read() - try: - hcl_ast = hcl2.parses(hcl_file_content) - except Exception as exc: - assert False, f"failed to tokenize terraform in `{hcl_path_str}`: {exc}" - - try: - hcl_reconstructed = hcl2.writes(hcl_ast) - except Exception as exc: - assert ( - False - ), f"failed to reconstruct terraform in `{hcl_path_str}`: {exc}" - - self.assertMultiLineEqual( - hcl_reconstructed, - hcl_file_content, - f"file {hcl_path_str} does not match its reconstructed version \ - exactly. this is usually whitespace related.", - ) diff --git a/test/unit/test_reconstruct_dict.py b/test/unit/test_reconstruct_dict.py deleted file mode 100644 index a65e8429..00000000 --- a/test/unit/test_reconstruct_dict.py +++ /dev/null @@ -1,88 +0,0 @@ -""" Test reconstructing hcl files""" - -import json -import traceback -from pathlib import Path -from unittest import TestCase - -import hcl2 - - -HELPERS_DIR = Path(__file__).absolute().parent.parent / "helpers" -HCL2_DIR = HELPERS_DIR / "terraform-config" -HCL2_FILES = [str(file.relative_to(HCL2_DIR)) for file in HCL2_DIR.iterdir()] -JSON_DIR = HELPERS_DIR / "terraform-config-json" - - -class TestReconstruct(TestCase): - """Test reconstructing a variety of hcl files""" - - # print any differences fully to the console - maxDiff = None - - def test_write_terraform(self): - """Test reconstructing a set of hcl2 files, to make sure they parse to the same structure""" - - # the reconstruction process is not precise, so some files do not - # reconstruct any embedded HCL expressions exactly the same. this - # list captures those, and should be manually inspected regularly to - # ensure that files remain syntactically equivalent - inexact_files = [ - # one level of interpolation is stripped from this file during - # reconstruction, since we don't have a way to distinguish it from - # a complex HCL expression. the output parses to the same value - # though - "multi_level_interpolation.tf", - ] - - for hcl_path in HCL2_FILES: - if hcl_path not in inexact_files: - yield self.check_terraform, hcl_path - - def check_terraform(self, hcl_path_str: str): - """ - Loads a single hcl2 file, parses it, reconstructs it, - parses the reconstructed file, and compares with the expected json - """ - hcl_path = (HCL2_DIR / hcl_path_str).absolute() - json_path = JSON_DIR / hcl_path.relative_to(HCL2_DIR).with_suffix(".json") - with hcl_path.open("r") as hcl_file, json_path.open("r") as json_file: - try: - hcl2_dict_correct = hcl2.load(hcl_file) - except Exception as exc: - raise RuntimeError( - f"failed to tokenize 'correct' terraform in " - f"`{hcl_path_str}`: {traceback.format_exc()}" - ) from exc - - json_dict = json.load(json_file) - - try: - hcl_ast = hcl2.reverse_transform(json_dict) - except Exception as exc: - raise RuntimeError( - f"failed to reverse transform HCL from " - f"`{json_path.name}`: {traceback.format_exc()}" - ) from exc - - try: - hcl_reconstructed = hcl2.writes(hcl_ast) - except Exception as exc: - raise RuntimeError( - f"failed to reconstruct terraform from AST from " - f"`{json_path.name}`: {traceback.format_exc()}" - ) from exc - - try: - hcl2_dict_reconstructed = hcl2.loads(hcl_reconstructed) - except Exception as exc: - raise RuntimeError( - f"failed to tokenize 'reconstructed' terraform from AST from " - f"`{json_path.name}`: {exc}, \n{hcl_reconstructed}" - ) from exc - - self.assertDictEqual( - hcl2_dict_reconstructed, - hcl2_dict_correct, - f"failed comparing {hcl_path_str} with reconstructed version from {json_path.name}", - ) From e32a5407f3cf4e0e052dfd10456a031ba7b4816c Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sun, 22 Feb 2026 18:14:38 +0100 Subject: [PATCH 16/24] removed old unused file --- hcl2/rules/tree.py | 106 --------------------------------------------- 1 file changed, 106 deletions(-) delete mode 100644 hcl2/rules/tree.py diff --git a/hcl2/rules/tree.py b/hcl2/rules/tree.py deleted file mode 100644 index e39d2077..00000000 --- a/hcl2/rules/tree.py +++ /dev/null @@ -1,106 +0,0 @@ -from abc import ABC, abstractmethod -from typing import List, Optional, Any, Union - - -class LarkNode(ABC): - """Base class for all nodes in the tree""" - - def __init__(self, index: int = -1, parent: Optional["Node"] = None): - self._index = index - self._parent = parent - - @property - def parent(self) -> Optional["Node"]: - return self._parent - - @property - def index(self) -> int: - return self._index - - def set_parent(self, parent: "Node"): - self._parent = parent - - def set_index(self, index: int): - self._index = index - - @abstractmethod - def serialize(self, options=None) -> Any: - pass - - @abstractmethod - def to_lark(self) -> Any: - """Convert back to Lark representation""" - pass - - def is_leaf(self) -> bool: - """Check if this is a leaf node (atomic token)""" - return isinstance(self, LeafNode) - - def is_sequence(self) -> bool: - """Check if this is a token sequence node""" - return isinstance(self, SequenceNode) - - def is_internal(self) -> bool: - """Check if this is an internal node (grammar rule)""" - return isinstance(self, InternalNode) - - def is_atomic(self) -> bool: - """Check if this represents an atomic value (leaf or sequence)""" - return self.is_leaf() or self.is_sequence() - - -class LarkLeaf(Node, ABC): - """""" - - def __init__(self, value: Any, index: int = -1, parent: Optional[TreeNode] = None): - super().__init__(index, parent) - self._value = value - - @property - def value(self) -> Any: - return self._value - - def serialize(self, options=None) -> Any: - return self._value - - -class InternalNode(Node): - def __init__( - self, children: List[Node], index: int = -1, parent: Optional[Node] = None - ): - super().__init__(index, parent) - self._children = children or [] - - # Set parent and index for all children - for i, child in enumerate(self._children): - if child is not None: - child.set_parent(self) - child.set_index(i) - - @property - def children(self) -> List[Node]: - return self._children - - def add_child(self, child: Node): - """Add a child to this internal node""" - child.set_parent(self) - child.set_index(len(self._children)) - self._children.append(child) - - def remove_child(self, index: int) -> Optional[Node]: - """Remove child at given index""" - if 0 <= index < len(self._children): - child = self._children.pop(index) - if child: - child.set_parent(None) - # Update indices for remaining children - for i in range(index, len(self._children)): - if self._children[i]: - self._children[i].set_index(i) - return child - return None - - @abstractmethod - def rule_name(self) -> str: - """The name of the grammar rule this represents""" - pass From 210e3cd2c354670b7ee3e0fde217d1862d4d26ba Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sun, 22 Feb 2026 18:15:24 +0100 Subject: [PATCH 17/24] fix - dont add spaces add the end of the line (before newline rule); remove unused import --- hcl2/reconstructor.py | 4 ++-- hcl2/rules/abstract.py | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/hcl2/reconstructor.py b/hcl2/reconstructor.py index e92f7040..1b5260ac 100644 --- a/hcl2/reconstructor.py +++ b/hcl2/reconstructor.py @@ -87,8 +87,8 @@ def _should_add_space_before( # Space after commas in tuples and function arguments... if self._last_token_name == tokens.COMMA.lark_name(): - # ... except for last comma - if token_type == tokens.RSQB.lark_name(): + # ... except before closing brackets or newlines + if token_type in (tokens.RSQB.lark_name(), "NL_OR_COMMENT"): return False return True diff --git a/hcl2/rules/abstract.py b/hcl2/rules/abstract.py index a494d901..316c777a 100644 --- a/hcl2/rules/abstract.py +++ b/hcl2/rules/abstract.py @@ -1,8 +1,7 @@ from abc import ABC, abstractmethod -from typing import Any, Union, List, Optional, Tuple, Callable +from typing import Any, Union, List, Optional, Callable from lark import Token, Tree -from lark.exceptions import VisitError from lark.tree import Meta from hcl2.utils import SerializationOptions, SerializationContext From b235ec9845b3caea2b6218f8f29608567c42a240 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sun, 22 Feb 2026 18:27:24 +0100 Subject: [PATCH 18/24] use unittest subTest to fix noise in test results ("The type of the None singleton"); fix whitespaces in `test/round_trip/hcl2_reconstructed/smoke.tf` --- test/round_trip/hcl2_reconstructed/smoke.tf | 8 +- test/round_trip/test_round_trip.py | 107 +++++++++----------- 2 files changed, 53 insertions(+), 62 deletions(-) diff --git a/test/round_trip/hcl2_reconstructed/smoke.tf b/test/round_trip/hcl2_reconstructed/smoke.tf index b5c54e96..8f17d6d6 100644 --- a/test/round_trip/hcl2_reconstructed/smoke.tf +++ b/test/round_trip/hcl2_reconstructed/smoke.tf @@ -32,7 +32,7 @@ block label1 label2 { k = a.b.5 l = a.*.b m = a[*][c].a.*.1 - + block b1 { a = 1 } @@ -52,12 +52,12 @@ block label1 label3 { block { route53_forwarding_rule_shares = { - for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) : + for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) : "${forwarding_rule_key}" => { aws_account_ids = [ - for account_name in var.route53_resolver_forwarding_rule_shares[forwarding_rule_key].aws_account_names : + for account_name in var.route53_resolver_forwarding_rule_shares[forwarding_rule_key].aws_account_names : module.remote_state_subaccounts.map[account_name].outputs["aws_account_id"] - + ] } ... if substr(bucket_name, 0, 1) == "l" } diff --git a/test/round_trip/test_round_trip.py b/test/round_trip/test_round_trip.py index b43340b6..93fcd111 100644 --- a/test/round_trip/test_round_trip.py +++ b/test/round_trip/test_round_trip.py @@ -26,12 +26,13 @@ ROUND_TRIP_DIR = Path(__file__).absolute().parent HCL2_ORIGINAL_DIR = ROUND_TRIP_DIR / "hcl2_original" +SPECIAL_DIR = ROUND_TRIP_DIR / "special" + _STEP_DIRS = { "hcl2_original": HCL2_ORIGINAL_DIR, "hcl2_reconstructed": ROUND_TRIP_DIR / "hcl2_reconstructed", "json_serialized": ROUND_TRIP_DIR / "json_serialized", "json_reserialized": ROUND_TRIP_DIR / "json_reserialized", - "json_operator_precedence": ROUND_TRIP_DIR / "json_operator_precedence", } _STEP_SUFFIXES = { @@ -39,7 +40,6 @@ "hcl2_reconstructed": ".tf", "json_serialized": ".json", "json_reserialized": ".json", - "json_operator_precedence": ".json", } @@ -48,7 +48,6 @@ class SuiteStep(Enum): RECONSTRUCTED = "hcl2_reconstructed" JSON_SERIALIZED = "json_serialized" JSON_RESERIALIZED = "json_reserialized" - JSON_OPERATOR_PRECEDENCE = "json_operator_precedence" def _get_suites() -> List[str]: @@ -109,20 +108,18 @@ class TestRoundTripSerialization(TestCase): def test_hcl_to_json(self): for suite in _get_suites(): - yield self.check_hcl_to_json, suite - - def check_hcl_to_json(self, suite: str): - hcl_path = _get_suite_file(suite, SuiteStep.ORIGINAL) - json_path = _get_suite_file(suite, SuiteStep.JSON_SERIALIZED) + with self.subTest(suite=suite): + hcl_path = _get_suite_file(suite, SuiteStep.ORIGINAL) + json_path = _get_suite_file(suite, SuiteStep.JSON_SERIALIZED) - actual = _parse_and_serialize(hcl_path.read_text()) - expected = json.loads(json_path.read_text()) + actual = _parse_and_serialize(hcl_path.read_text()) + expected = json.loads(json_path.read_text()) - self.assertEqual( - actual, - expected, - f"HCL → JSON serialization mismatch for {suite}", - ) + self.assertEqual( + actual, + expected, + f"HCL → JSON serialization mismatch for {suite}", + ) class TestRoundTripReserialization(TestCase): @@ -132,21 +129,19 @@ class TestRoundTripReserialization(TestCase): def test_json_reserialization(self): for suite in _get_suites(): - yield self.check_json_reserialization, suite + with self.subTest(suite=suite): + hcl_path = _get_suite_file(suite, SuiteStep.ORIGINAL) + json_reserialized_path = _get_suite_file(suite, SuiteStep.JSON_RESERIALIZED) - def check_json_reserialization(self, suite: str): - hcl_path = _get_suite_file(suite, SuiteStep.ORIGINAL) - json_reserialized_path = _get_suite_file(suite, SuiteStep.JSON_RESERIALIZED) + serialized = _parse_and_serialize(hcl_path.read_text()) + actual = _deserialize_and_reserialize(serialized) - serialized = _parse_and_serialize(hcl_path.read_text()) - actual = _deserialize_and_reserialize(serialized) - - expected = json.loads(json_reserialized_path.read_text()) - self.assertEqual( - actual, - expected, - f"JSON reserialization mismatch for {suite}", - ) + expected = json.loads(json_reserialized_path.read_text()) + self.assertEqual( + actual, + expected, + f"JSON reserialization mismatch for {suite}", + ) class TestRoundTripReconstruction(TestCase): @@ -156,21 +151,19 @@ class TestRoundTripReconstruction(TestCase): def test_json_to_hcl(self): for suite in _get_suites(): - yield self.check_json_to_hcl, suite - - def check_json_to_hcl(self, suite: str): - hcl_path = _get_suite_file(suite, SuiteStep.ORIGINAL) - hcl_reconstructed_path = _get_suite_file(suite, SuiteStep.RECONSTRUCTED) + with self.subTest(suite=suite): + hcl_path = _get_suite_file(suite, SuiteStep.ORIGINAL) + hcl_reconstructed_path = _get_suite_file(suite, SuiteStep.RECONSTRUCTED) - serialized = _parse_and_serialize(hcl_path.read_text()) - actual = _deserialize_and_reconstruct(serialized) + serialized = _parse_and_serialize(hcl_path.read_text()) + actual = _deserialize_and_reconstruct(serialized) - expected = hcl_reconstructed_path.read_text() - self.assertMultiLineEqual( - actual, - expected, - f"HCL reconstruction mismatch for {suite}", - ) + expected = hcl_reconstructed_path.read_text() + self.assertMultiLineEqual( + actual, + expected, + f"HCL reconstruction mismatch for {suite}", + ) class TestRoundTripFull(TestCase): @@ -180,27 +173,25 @@ class TestRoundTripFull(TestCase): def test_full_round_trip(self): for suite in _get_suites(): - yield self.check_full_round_trip, suite - - def check_full_round_trip(self, suite: str): - hcl_path = _get_suite_file(suite, SuiteStep.ORIGINAL) - original_hcl = hcl_path.read_text() + with self.subTest(suite=suite): + hcl_path = _get_suite_file(suite, SuiteStep.ORIGINAL) + original_hcl = hcl_path.read_text() - # Forward: HCL → JSON - serialized = _parse_and_serialize(original_hcl) + # Forward: HCL → JSON + serialized = _parse_and_serialize(original_hcl) - # Reconstruct: JSON → HCL - reconstructed_hcl = _deserialize_and_reconstruct(serialized) + # Reconstruct: JSON → HCL + reconstructed_hcl = _deserialize_and_reconstruct(serialized) - # Re-parse: reconstructed HCL → JSON - reserialized = _parse_and_serialize(reconstructed_hcl) + # Reparse: reconstructed HCL → JSON + reserialized = _parse_and_serialize(reconstructed_hcl) - self.assertEqual( - reserialized, - serialized, - f"Full round-trip mismatch for {suite}: " - f"HCL → JSON → HCL → JSON did not produce identical JSON", - ) + self.assertEqual( + reserialized, + serialized, + f"Full round-trip mismatch for {suite}: " + f"HCL → JSON → HCL → JSON did not produce identical JSON", + ) class TestOperatorPrecedence(TestCase): From a3fe3267dc0361d3cf78ab5d8bc201c0e53d90ab Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sun, 22 Feb 2026 18:39:02 +0100 Subject: [PATCH 19/24] remove files for WIP features --- hcl2/editor.py | 77 -------------- hcl2/processor.py | 258 ---------------------------------------------- 2 files changed, 335 deletions(-) delete mode 100644 hcl2/editor.py delete mode 100644 hcl2/processor.py diff --git a/hcl2/editor.py b/hcl2/editor.py deleted file mode 100644 index 9efce08f..00000000 --- a/hcl2/editor.py +++ /dev/null @@ -1,77 +0,0 @@ -import dataclasses -from copy import copy, deepcopy -from typing import List, Optional, Set, Tuple - -from hcl2.rule_transformer.rules.abstract import LarkRule -from hcl2.rule_transformer.rules.base import BlockRule, StartRule - - -@dataclasses.dataclass -class TreePathElement: - - name: str - index: int = 0 - - -@dataclasses.dataclass -class TreePath: - - elements: List[TreePathElement] = dataclasses.field(default_factory=list) - - @classmethod - def build(cls, elements: List[Tuple[str, Optional[int]] | str]): - results = [] - for element in elements: - if isinstance(element, tuple): - if len(element) == 1: - result = TreePathElement(element[0], 0) - else: - result = TreePathElement(*element) - else: - result = TreePathElement(element, 0) - - results.append(result) - - return cls(results) - - def __iter__(self): - return self.elements.__iter__() - - def __len__(self): - return self.elements.__len__() - - -class Editor: - def __init__(self, rules_tree: LarkRule): - self.rules_tree = rules_tree - - @classmethod - def _find_one(cls, rules_tree: LarkRule, path_element: TreePathElement) -> LarkRule: - return cls._find_all(rules_tree, path_element.name)[path_element.index] - - @classmethod - def _find_all(cls, rules_tree: LarkRule, rule_name: str) -> List[LarkRule]: - children = [] - print("rule", rules_tree) - print("rule children", rules_tree.children) - for child in rules_tree.children: - if isinstance(child, LarkRule) and child.lark_name() == rule_name: - children.append(child) - - return children - - def find_by_path(self, path: TreePath, rule_name: str) -> List[LarkRule]: - path = deepcopy(path.elements) - - current_rule = self.rules_tree - while len(path) > 0: - current_path, *path = path - print(current_path, path) - current_rule = self._find_one(current_rule, current_path) - - return self._find_all(current_rule, rule_name) - - # def visit(self, path: TreePath) -> "Editor": - # - # while len(path) > 1: - # current = diff --git a/hcl2/processor.py b/hcl2/processor.py deleted file mode 100644 index b854aff5..00000000 --- a/hcl2/processor.py +++ /dev/null @@ -1,258 +0,0 @@ -from copy import copy, deepcopy -from typing import ( - List, - Optional, - Union, - Callable, - Any, - Tuple, - Generic, - TypeVar, - cast, - Generator, -) - -from hcl2.rule_transformer.rules.abstract import LarkRule, LarkElement -from hcl2.rule_transformer.rules.base import BlockRule, AttributeRule -from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule - -T = TypeVar("T", bound=LarkRule) - - -class RulesProcessor(Generic[T]): - """""" - - @classmethod - def _traverse( - cls, - node: T, - predicate: Callable[[T], bool], - current_depth: int = 0, - max_depth: Optional[int] = None, - ) -> List["RulesProcessor"]: - - results = [] - - if predicate(node): - results.append(cls(node)) - - if max_depth is not None and current_depth >= max_depth: - return results - - for child in node.children: - if child is None or not isinstance(child, LarkRule): - continue - - child_results = cls._traverse( - child, - predicate, - current_depth + 1, - max_depth, - ) - results.extend(child_results) - - return results - - def __init__(self, node: LarkRule): - self.node = node - - @property - def siblings(self): - if self.node.parent is None: - return None - return self.node.parent.children - - @property - def next_siblings(self): - if self.node.parent is None: - return None - return self.node.parent.children[self.node.index + 1 :] - - @property - def previous_siblings(self): - if self.node.parent is None: - return None - return self.node.parent.children[: self.node.index - 1] - - def walk(self) -> Generator[Tuple["RulesProcessor", List["RulesProcessor"]]]: - child_processors = [self.__class__(child) for child in self.node.children] - yield self, child_processors - for processor in child_processors: - if isinstance(processor.node, LarkRule): - for result in processor.walk(): - yield result - - def find_block( - self, - labels: List[str], - exact_match: bool = True, - max_depth: Optional[int] = None, - ) -> "RulesProcessor[BlockRule]": - return self.find_blocks(labels, exact_match, max_depth)[0] - - def find_blocks( - self, - labels: List[str], - exact_match: bool = True, - max_depth: Optional[int] = None, - ) -> List["RulesProcessor[BlockRule]"]: - """ - Find blocks by their labels. - - Args: - labels: List of label strings to match - exact_match: If True, all labels must match exactly. If False, labels can be a subset. - max_depth: Maximum depth to search - - Returns: - ... - """ - - def block_predicate(node: LarkRule) -> bool: - if not isinstance(node, BlockRule): - return False - - node_labels = [label.serialize() for label in node.labels] - - if exact_match: - return node_labels == labels - else: - # Check if labels is a prefix of node_labels - if len(labels) > len(node_labels): - return False - return node_labels[: len(labels)] == labels - - return cast( - List[RulesProcessor[BlockRule]], - self._traverse(self.node, block_predicate, max_depth=max_depth), - ) - - def attribute( - self, name: str, max_depth: Optional[int] = None - ) -> "RulesProcessor[AttributeRule]": - return self.find_attributes(name, max_depth)[0] - - def find_attributes( - self, name: str, max_depth: Optional[int] = None - ) -> List["RulesProcessor[AttributeRule]"]: - """ - Find attributes by their identifier name. - - Args: - name: Attribute name to search for - max_depth: Maximum depth to search - - Returns: - List of TreePath objects for matching attributes - """ - - def attribute_predicate(node: LarkRule) -> bool: - if not isinstance(node, AttributeRule): - return False - return node.identifier.serialize() == name - - return self._traverse(self.node, attribute_predicate, max_depth=max_depth) - - def rule(self, rule_name: str, max_depth: Optional[int] = None): - return self.find_rules(rule_name, max_depth)[0] - - def find_rules( - self, rule_name: str, max_depth: Optional[int] = None - ) -> List["RulesProcessor"]: - """ - Find all rules of a specific type. - - Args: - rule_name: Name of the rule type to find - max_depth: Maximum depth to search - - Returns: - List of TreePath objects for matching rules - """ - - def rule_predicate(node: LarkRule) -> bool: - return node.lark_name() == rule_name - - return self._traverse(self.node, rule_predicate, max_depth=max_depth) - - def find_by_predicate( - self, predicate: Callable[[LarkRule], bool], max_depth: Optional[int] = None - ) -> List["RulesProcessor"]: - """ - Find all rules matching a custom predicate. - - Args: - predicate: Function that returns True for nodes to collect - max_depth: Maximum depth to search - - Returns: - List of TreePath objects for matching rules - """ - return self._traverse(self.node, predicate, max_depth) - - # Convenience methods - def get_all_blocks(self, max_depth: Optional[int] = None) -> List: - """Get all blocks in the tree.""" - return self.find_rules("block", max_depth) - - def get_all_attributes( - self, max_depth: Optional[int] = None - ) -> List["RulesProcessor"]: - """Get all attributes in the tree.""" - return self.find_rules("attribute", max_depth) - - def previous(self, skip_new_line: bool = True) -> Optional["RulesProcessor"]: - """Get the next sibling node.""" - if self.node.parent is None: - return None - - for sibling in reversed(self.previous_siblings): - if sibling is not None and isinstance(sibling, LarkRule): - if skip_new_line and isinstance(sibling, NewLineOrCommentRule): - continue - return self.__class__(sibling) - - def next(self, skip_new_line: bool = True) -> Optional["RulesProcessor"]: - """Get the next sibling node.""" - if self.node.parent is None: - return None - - for sibling in self.next_siblings: - if sibling is not None and isinstance(sibling, LarkRule): - if skip_new_line and isinstance(sibling, NewLineOrCommentRule): - continue - return self.__class__(sibling) - - def append_child( - self, new_node: LarkRule, indentation: bool = True - ) -> "RulesProcessor": - children = self.node.children - if indentation: - if isinstance(children[-1], NewLineOrCommentRule): - children.pop() - children.append(NewLineOrCommentRule.from_string("\n ")) - - new_node = deepcopy(new_node) - new_node.set_parent(self.node) - new_node.set_index(len(children)) - children.append(new_node) - return self.__class__(new_node) - - def replace(self, new_node: LarkRule) -> "RulesProcessor": - new_node = deepcopy(new_node) - - self.node.parent.children.pop(self.node.index) - self.node.parent.children.insert(self.node.index, new_node) - new_node.set_parent(self.node.parent) - new_node.set_index(self.node.index) - return self.__class__(new_node) - - # def insert_before(self, new_node: LarkRule) -> bool: - # """Insert a new node before this one.""" - # if self.parent is None or self.parent_index < 0: - # return False - # - # try: - # self.parent.children.insert(self.parent_index, new_node) - # except (IndexError, AttributeError): - # return False From 4054fc9627d70028f56124fc22c2c112ef4752f9 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sun, 22 Feb 2026 19:30:21 +0100 Subject: [PATCH 20/24] add new unit tests, exclude some files from coverage report --- .coveragerc | 3 + test/helpers/__init__.py | 3 - test/helpers/hcl2_helper.py | 21 -- test/unit/__init__.py | 0 test/unit/rules/__init__.py | 0 test/unit/rules/test_abstract.py | 178 ++++++++++ test/unit/rules/test_containers.py | 396 +++++++++++++++++++++ test/unit/rules/test_expressions.py | 489 ++++++++++++++++++++++++++ test/unit/rules/test_literal_rules.py | 95 +++++ test/unit/rules/test_strings.py | 247 +++++++++++++ test/unit/rules/test_tokens.py | 162 +++++++++ test/unit/rules/test_whitespace.py | 135 +++++++ test/unit/test_utils.py | 148 ++++++++ 13 files changed, 1853 insertions(+), 24 deletions(-) delete mode 100644 test/helpers/__init__.py delete mode 100644 test/helpers/hcl2_helper.py create mode 100644 test/unit/__init__.py create mode 100644 test/unit/rules/__init__.py create mode 100644 test/unit/rules/test_abstract.py create mode 100644 test/unit/rules/test_containers.py create mode 100644 test/unit/rules/test_expressions.py create mode 100644 test/unit/rules/test_literal_rules.py create mode 100644 test/unit/rules/test_strings.py create mode 100644 test/unit/rules/test_tokens.py create mode 100644 test/unit/rules/test_whitespace.py create mode 100644 test/unit/test_utils.py diff --git a/.coveragerc b/.coveragerc index 4facabdc..30e6dc8c 100644 --- a/.coveragerc +++ b/.coveragerc @@ -3,6 +3,9 @@ branch = true omit = hcl2/__main__.py hcl2/lark_parser.py + hcl2/version.py + hcl2/__init__.py + hcl2/rules/__init__.py [report] show_missing = true diff --git a/test/helpers/__init__.py b/test/helpers/__init__.py deleted file mode 100644 index ba33e308..00000000 --- a/test/helpers/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -""" -Helper functions for tests -""" diff --git a/test/helpers/hcl2_helper.py b/test/helpers/hcl2_helper.py deleted file mode 100644 index c39ee7fb..00000000 --- a/test/helpers/hcl2_helper.py +++ /dev/null @@ -1,21 +0,0 @@ -# pylint:disable=C0114,C0115,C0116 - -from lark import Tree - -from hcl2.parser import parser -from hcl2.dict_transformer import DictTransformer - - -class Hcl2Helper: - @classmethod - def load(cls, syntax: str) -> Tree: - return parser().parse(syntax) - - @classmethod - def load_to_dict(cls, syntax) -> dict: - tree = cls.load(syntax) - return DictTransformer().transform(tree) - - @classmethod - def build_argument(cls, identifier: str, expression: str = '"expression"') -> str: - return f"{identifier} = {expression}" diff --git a/test/unit/__init__.py b/test/unit/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/unit/rules/__init__.py b/test/unit/rules/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/unit/rules/test_abstract.py b/test/unit/rules/test_abstract.py new file mode 100644 index 00000000..8803effc --- /dev/null +++ b/test/unit/rules/test_abstract.py @@ -0,0 +1,178 @@ +from unittest import TestCase + +from lark import Token, Tree +from lark.tree import Meta + +from hcl2.rules.abstract import LarkElement, LarkToken, LarkRule +from hcl2.utils import SerializationOptions, SerializationContext + + +# --- Concrete stubs for testing ABCs --- + + +class ConcreteToken(LarkToken): + @staticmethod + def lark_name() -> str: + return "TEST_TOKEN" + + @property + def serialize_conversion(self): + return str + + +class IntToken(LarkToken): + @staticmethod + def lark_name() -> str: + return "INT_TOKEN" + + @property + def serialize_conversion(self): + return int + + +class ConcreteRule(LarkRule): + @staticmethod + def lark_name() -> str: + return "test_rule" + + def serialize(self, options=SerializationOptions(), context=SerializationContext()): + return "test" + + +# --- Tests --- + + +class TestLarkToken(TestCase): + def test_init_stores_value(self): + token = ConcreteToken("hello") + self.assertEqual(token.value, "hello") + + def test_value_property(self): + token = ConcreteToken(42) + self.assertEqual(token.value, 42) + + def test_set_value(self): + token = ConcreteToken("old") + token.set_value("new") + self.assertEqual(token.value, "new") + + def test_str(self): + token = ConcreteToken("hello") + self.assertEqual(str(token), "hello") + + def test_str_numeric(self): + token = ConcreteToken(42) + self.assertEqual(str(token), "42") + + def test_repr(self): + token = ConcreteToken("hello") + self.assertEqual(repr(token), "") + + def test_to_lark_returns_token(self): + token = ConcreteToken("val") + lark_token = token.to_lark() + self.assertIsInstance(lark_token, Token) + self.assertEqual(lark_token.type, "TEST_TOKEN") + self.assertEqual(lark_token, "val") + + def test_serialize_uses_conversion(self): + token = ConcreteToken("hello") + self.assertEqual(token.serialize(), "hello") + + def test_serialize_int_conversion(self): + token = IntToken("42") + result = token.serialize() + self.assertEqual(result, 42) + self.assertIsInstance(result, int) + + def test_lark_name(self): + self.assertEqual(ConcreteToken.lark_name(), "TEST_TOKEN") + + +class TestLarkRule(TestCase): + def test_init_sets_children(self): + t1 = ConcreteToken("a") + t2 = ConcreteToken("b") + rule = ConcreteRule([t1, t2]) + self.assertEqual(rule.children, [t1, t2]) + + def test_init_sets_parent_and_index(self): + t1 = ConcreteToken("a") + t2 = ConcreteToken("b") + rule = ConcreteRule([t1, t2]) + self.assertIs(t1._parent, rule) + self.assertIs(t2._parent, rule) + self.assertEqual(t1._index, 0) + self.assertEqual(t2._index, 1) + + def test_init_skips_none_children_for_parent_index(self): + t1 = ConcreteToken("a") + rule = ConcreteRule([None, t1, None]) + self.assertIs(t1._parent, rule) + self.assertEqual(t1._index, 1) + + def test_init_with_meta(self): + meta = Meta() + rule = ConcreteRule([], meta) + self.assertIs(rule._meta, meta) + + def test_init_without_meta(self): + rule = ConcreteRule([]) + self.assertIsNotNone(rule._meta) + + def test_parent_property(self): + child_rule = ConcreteRule([]) + parent_rule = ConcreteRule([child_rule]) + self.assertIs(child_rule.parent, parent_rule) + + def test_index_property(self): + child_rule = ConcreteRule([]) + ConcreteRule([child_rule]) + self.assertEqual(child_rule.index, 0) + + def test_children_property(self): + t = ConcreteToken("x") + rule = ConcreteRule([t]) + self.assertEqual(rule.children, [t]) + + def test_to_lark_builds_tree(self): + t1 = ConcreteToken("a") + t2 = ConcreteToken("b") + rule = ConcreteRule([t1, t2]) + tree = rule.to_lark() + self.assertIsInstance(tree, Tree) + self.assertEqual(tree.data, "test_rule") + self.assertEqual(len(tree.children), 2) + + def test_to_lark_skips_none_children(self): + t1 = ConcreteToken("a") + rule = ConcreteRule([None, t1, None]) + tree = rule.to_lark() + self.assertEqual(len(tree.children), 1) + self.assertEqual(tree.children[0], "a") + + def test_repr(self): + rule = ConcreteRule([]) + self.assertEqual(repr(rule), "") + + def test_nested_rules(self): + inner = ConcreteRule([ConcreteToken("x")]) + outer = ConcreteRule([inner]) + self.assertIs(inner.parent, outer) + tree = outer.to_lark() + self.assertEqual(tree.data, "test_rule") + self.assertEqual(len(tree.children), 1) + self.assertIsInstance(tree.children[0], Tree) + + +class TestLarkElement(TestCase): + def test_set_index(self): + token = ConcreteToken("x") + token.set_index(5) + self.assertEqual(token._index, 5) + + def test_set_parent(self): + token = ConcreteToken("x") + parent = ConcreteRule([]) + token.set_parent(parent) + self.assertIs(token._parent, parent) diff --git a/test/unit/rules/test_containers.py b/test/unit/rules/test_containers.py new file mode 100644 index 00000000..b49b3f38 --- /dev/null +++ b/test/unit/rules/test_containers.py @@ -0,0 +1,396 @@ +from unittest import TestCase + +from hcl2.rules.containers import ( + TupleRule, + ObjectElemKeyRule, + ObjectElemKeyExpressionRule, + ObjectElemKeyDotAccessor, + ObjectElemRule, + ObjectRule, +) +from hcl2.rules.expressions import ExpressionRule +from hcl2.rules.literal_rules import IdentifierRule, IntLitRule, FloatLitRule +from hcl2.rules.strings import StringRule, StringPartRule +from hcl2.rules.tokens import ( + LSQB, + RSQB, + LBRACE, + RBRACE, + LPAR, + RPAR, + DOT, + EQ, + COLON, + COMMA, + NAME, + DBLQUOTE, + STRING_CHARS, + IntLiteral, + FloatLiteral, +) +from hcl2.rules.whitespace import NewLineOrCommentRule +from hcl2.rules.tokens import NL_OR_COMMENT +from hcl2.utils import SerializationOptions, SerializationContext + + +# --- Stubs & Helpers --- + + +class StubExpression(ExpressionRule): + """Minimal ExpressionRule that serializes to a fixed value.""" + + def __init__(self, value): + self._stub_value = value + super().__init__([], None) + + def serialize(self, options=SerializationOptions(), context=SerializationContext()): + return self._stub_value + + +def _make_nlc(text): + return NewLineOrCommentRule([NL_OR_COMMENT(text)]) + + +def _make_identifier(name): + return IdentifierRule([NAME(name)]) + + +def _make_string_rule(text): + part = StringPartRule([STRING_CHARS(text)]) + return StringRule([DBLQUOTE(), part, DBLQUOTE()]) + + +def _make_object_elem_key(identifier_name): + return ObjectElemKeyRule([_make_identifier(identifier_name)]) + + +def _make_object_elem(key_name, expr_value, sep=None): + key = _make_object_elem_key(key_name) + separator = sep or EQ() + return ObjectElemRule([key, separator, StubExpression(expr_value)]) + + +# --- TupleRule tests --- + + +class TestTupleRule(TestCase): + def test_lark_name(self): + self.assertEqual(TupleRule.lark_name(), "tuple") + + def test_elements_empty_tuple(self): + rule = TupleRule([LSQB(), RSQB()]) + self.assertEqual(rule.elements, []) + + def test_elements_single(self): + expr = StubExpression(1) + rule = TupleRule([LSQB(), expr, RSQB()]) + self.assertEqual(rule.elements, [expr]) + + def test_elements_multiple(self): + e1 = StubExpression(1) + e2 = StubExpression(2) + e3 = StubExpression(3) + rule = TupleRule([LSQB(), e1, COMMA(), e2, COMMA(), e3, RSQB()]) + self.assertEqual(rule.elements, [e1, e2, e3]) + + def test_elements_skips_non_expressions(self): + e1 = StubExpression(1) + e2 = StubExpression(2) + nlc = _make_nlc("\n") + rule = TupleRule([LSQB(), nlc, e1, COMMA(), nlc, e2, RSQB()]) + self.assertEqual(len(rule.elements), 2) + + def test_serialize_default_returns_list(self): + rule = TupleRule( + [LSQB(), StubExpression(1), COMMA(), StubExpression(2), RSQB()] + ) + result = rule.serialize() + self.assertEqual(result, [1, 2]) + + def test_serialize_empty_returns_empty_list(self): + rule = TupleRule([LSQB(), RSQB()]) + self.assertEqual(rule.serialize(), []) + + def test_serialize_single_element(self): + rule = TupleRule([LSQB(), StubExpression(42), RSQB()]) + self.assertEqual(rule.serialize(), [42]) + + def test_serialize_wrap_tuples(self): + rule = TupleRule( + [LSQB(), StubExpression("a"), COMMA(), StubExpression("b"), RSQB()] + ) + opts = SerializationOptions(wrap_tuples=True) + result = rule.serialize(options=opts) + self.assertEqual(result, "${[a, b]}") + + def test_serialize_wrap_tuples_empty(self): + rule = TupleRule([LSQB(), RSQB()]) + opts = SerializationOptions(wrap_tuples=True) + result = rule.serialize(options=opts) + self.assertEqual(result, "${[]}") + + def test_serialize_inside_dollar_string(self): + rule = TupleRule([LSQB(), StubExpression("a"), RSQB()]) + ctx = SerializationContext(inside_dollar_string=True) + result = rule.serialize(context=ctx) + # Inside dollar string forces string representation + self.assertEqual(result, "[a]") + + def test_serialize_inside_dollar_string_no_extra_wrap(self): + rule = TupleRule( + [LSQB(), StubExpression("a"), COMMA(), StubExpression("b"), RSQB()] + ) + ctx = SerializationContext(inside_dollar_string=True) + result = rule.serialize(context=ctx) + self.assertEqual(result, "[a, b]") + + def test_serialize_wrap_tuples_inside_dollar_string(self): + rule = TupleRule([LSQB(), StubExpression("x"), RSQB()]) + opts = SerializationOptions(wrap_tuples=True) + ctx = SerializationContext(inside_dollar_string=True) + result = rule.serialize(options=opts, context=ctx) + # Already inside $, so no extra wrapping + self.assertEqual(result, "[x]") + + +# --- ObjectElemKeyRule tests --- + + +class TestObjectElemKeyRule(TestCase): + def test_lark_name(self): + self.assertEqual(ObjectElemKeyRule.lark_name(), "object_elem_key") + + def test_value_property_identifier(self): + ident = _make_identifier("foo") + rule = ObjectElemKeyRule([ident]) + self.assertIs(rule.value, ident) + + def test_serialize_identifier(self): + rule = ObjectElemKeyRule([_make_identifier("my_key")]) + self.assertEqual(rule.serialize(), "my_key") + + def test_serialize_int_lit(self): + rule = ObjectElemKeyRule([IntLitRule([IntLiteral("5")])]) + self.assertEqual(rule.serialize(), 5) + + def test_serialize_float_lit(self): + rule = ObjectElemKeyRule([FloatLitRule([FloatLiteral("3.14")])]) + self.assertAlmostEqual(rule.serialize(), 3.14) + + def test_serialize_string(self): + rule = ObjectElemKeyRule([_make_string_rule("k3")]) + self.assertEqual(rule.serialize(), '"k3"') + + +# --- ObjectElemKeyExpressionRule tests --- + + +class TestObjectElemKeyExpressionRule(TestCase): + def test_lark_name(self): + self.assertEqual( + ObjectElemKeyExpressionRule.lark_name(), "object_elem_key_expression" + ) + + def test_expression_property(self): + expr = StubExpression("5 + 5") + rule = ObjectElemKeyExpressionRule([LPAR(), expr, RPAR()]) + self.assertIs(rule.expression, expr) + + def test_serialize(self): + rule = ObjectElemKeyExpressionRule([LPAR(), StubExpression("5 + 5"), RPAR()]) + result = rule.serialize() + self.assertEqual(result, "${(5 + 5)}") + + def test_serialize_inside_dollar_string(self): + rule = ObjectElemKeyExpressionRule([LPAR(), StubExpression("5 + 5"), RPAR()]) + ctx = SerializationContext(inside_dollar_string=True) + result = rule.serialize(context=ctx) + self.assertEqual(result, "(5 + 5)") + + +# --- ObjectElemKeyDotAccessor tests --- + + +class TestObjectElemKeyDotAccessor(TestCase): + def test_lark_name(self): + self.assertEqual( + ObjectElemKeyDotAccessor.lark_name(), "object_elem_key_dot_accessor" + ) + + def test_identifiers_property(self): + i1 = _make_identifier("k5") + i2 = _make_identifier("attr") + i3 = _make_identifier("sub") + rule = ObjectElemKeyDotAccessor([i1, DOT(), i2, DOT(), i3]) + idents = rule.identifiers + self.assertEqual(len(idents), 3) + self.assertIs(idents[0], i1) + self.assertIs(idents[1], i2) + self.assertIs(idents[2], i3) + + def test_identifiers_two_segments(self): + i1 = _make_identifier("a") + i2 = _make_identifier("b") + rule = ObjectElemKeyDotAccessor([i1, DOT(), i2]) + self.assertEqual(len(rule.identifiers), 2) + + def test_serialize(self): + rule = ObjectElemKeyDotAccessor( + [ + _make_identifier("k5"), + DOT(), + _make_identifier("attr"), + DOT(), + _make_identifier("sub"), + ] + ) + self.assertEqual(rule.serialize(), "k5.attr.sub") + + def test_serialize_two_segments(self): + rule = ObjectElemKeyDotAccessor( + [_make_identifier("a"), DOT(), _make_identifier("b")] + ) + self.assertEqual(rule.serialize(), "a.b") + + +# --- ObjectElemRule tests --- + + +class TestObjectElemRule(TestCase): + def test_lark_name(self): + self.assertEqual(ObjectElemRule.lark_name(), "object_elem") + + def test_key_property(self): + key = _make_object_elem_key("foo") + rule = ObjectElemRule([key, EQ(), StubExpression("bar")]) + self.assertIs(rule.key, key) + + def test_expression_property(self): + expr = StubExpression("bar") + rule = ObjectElemRule([_make_object_elem_key("foo"), EQ(), expr]) + self.assertIs(rule.expression, expr) + + def test_serialize_with_eq(self): + rule = _make_object_elem("name", "value") + self.assertEqual(rule.serialize(), {"name": "value"}) + + def test_serialize_with_colon(self): + rule = ObjectElemRule([_make_object_elem_key("k"), COLON(), StubExpression(42)]) + self.assertEqual(rule.serialize(), {"k": 42}) + + def test_serialize_int_value(self): + rule = _make_object_elem("count", 5) + self.assertEqual(rule.serialize(), {"count": 5}) + + def test_serialize_string_key(self): + key = ObjectElemKeyRule([_make_string_rule("quoted")]) + rule = ObjectElemRule([key, EQ(), StubExpression("val")]) + self.assertEqual(rule.serialize(), {'"quoted"': "val"}) + + +# --- ObjectRule tests --- + + +class TestObjectRule(TestCase): + def test_lark_name(self): + self.assertEqual(ObjectRule.lark_name(), "object") + + def test_elements_empty(self): + rule = ObjectRule([LBRACE(), RBRACE()]) + self.assertEqual(rule.elements, []) + + def test_elements_single(self): + elem = _make_object_elem("k", "v") + rule = ObjectRule([LBRACE(), elem, RBRACE()]) + self.assertEqual(rule.elements, [elem]) + + def test_elements_multiple(self): + e1 = _make_object_elem("a", 1) + e2 = _make_object_elem("b", 2) + rule = ObjectRule([LBRACE(), e1, e2, RBRACE()]) + self.assertEqual(rule.elements, [e1, e2]) + + def test_elements_skips_non_elem(self): + e1 = _make_object_elem("a", 1) + nlc = _make_nlc("\n") + rule = ObjectRule([LBRACE(), nlc, e1, nlc, RBRACE()]) + self.assertEqual(rule.elements, [e1]) + + def test_serialize_default_returns_dict(self): + rule = ObjectRule( + [ + LBRACE(), + _make_object_elem("k1", "v1"), + _make_object_elem("k2", "v2"), + RBRACE(), + ] + ) + result = rule.serialize() + self.assertEqual(result, {"k1": "v1", "k2": "v2"}) + + def test_serialize_empty_returns_empty_dict(self): + rule = ObjectRule([LBRACE(), RBRACE()]) + self.assertEqual(rule.serialize(), {}) + + def test_serialize_single_element(self): + rule = ObjectRule([LBRACE(), _make_object_elem("x", 42), RBRACE()]) + self.assertEqual(rule.serialize(), {"x": 42}) + + def test_serialize_wrap_objects(self): + rule = ObjectRule( + [ + LBRACE(), + _make_object_elem("k1", "v1"), + _make_object_elem("k2", "v2"), + RBRACE(), + ] + ) + opts = SerializationOptions(wrap_objects=True) + result = rule.serialize(options=opts) + # Result is "{k1 = v1, k2 = v2}" wrapped in ${}, giving ${{...}} + self.assertEqual(result, "${{k1 = v1, k2 = v2}}") + + def test_serialize_wrap_objects_empty(self): + rule = ObjectRule([LBRACE(), RBRACE()]) + opts = SerializationOptions(wrap_objects=True) + result = rule.serialize(options=opts) + self.assertEqual(result, "${{}}") + + def test_serialize_inside_dollar_string(self): + rule = ObjectRule( + [ + LBRACE(), + _make_object_elem("k", "v"), + RBRACE(), + ] + ) + ctx = SerializationContext(inside_dollar_string=True) + result = rule.serialize(context=ctx) + # Inside dollar string forces string representation + self.assertEqual(result, "{k = v}") + + def test_serialize_inside_dollar_string_no_extra_wrap(self): + rule = ObjectRule( + [ + LBRACE(), + _make_object_elem("a", 1), + _make_object_elem("b", 2), + RBRACE(), + ] + ) + ctx = SerializationContext(inside_dollar_string=True) + result = rule.serialize(context=ctx) + self.assertEqual(result, "{a = 1, b = 2}") + + def test_serialize_wrap_objects_inside_dollar_string(self): + rule = ObjectRule( + [ + LBRACE(), + _make_object_elem("k", "v"), + RBRACE(), + ] + ) + opts = SerializationOptions(wrap_objects=True) + ctx = SerializationContext(inside_dollar_string=True) + result = rule.serialize(options=opts, context=ctx) + self.assertEqual(result, "{k = v}") diff --git a/test/unit/rules/test_expressions.py b/test/unit/rules/test_expressions.py new file mode 100644 index 00000000..16800ed0 --- /dev/null +++ b/test/unit/rules/test_expressions.py @@ -0,0 +1,489 @@ +from unittest import TestCase + +from hcl2.rules.abstract import LarkRule +from hcl2.rules.expressions import ( + ExpressionRule, + ExprTermRule, + ConditionalRule, + BinaryTermRule, + BinaryOpRule, + UnaryOpRule, +) +from hcl2.rules.literal_rules import BinaryOperatorRule, IdentifierRule +from hcl2.rules.tokens import ( + LPAR, + RPAR, + QMARK, + COLON, + BINARY_OP, + NAME, + StringToken, +) +from hcl2.utils import SerializationOptions, SerializationContext + + +# --- Stubs & helpers --- + + +class StubExpression(ExpressionRule): + """Minimal concrete ExpressionRule that serializes to a fixed string.""" + + def __init__(self, value, children=None): + self._stub_value = value + super().__init__(children or [], None) + + def serialize(self, options=SerializationOptions(), context=SerializationContext()): + return self._stub_value + + +class NonExpressionRule(LarkRule): + """A rule that is NOT an ExpressionRule, for parent-chain tests.""" + + @staticmethod + def lark_name(): + return "non_expression" + + def serialize(self, options=SerializationOptions(), context=SerializationContext()): + return "non_expr" + + +def _make_expr_term(value): + """Build ExprTermRule wrapping a StubExpression (no parens).""" + return ExprTermRule([StubExpression(value)]) + + +def _make_paren_expr_term(value): + """Build ExprTermRule wrapping a StubExpression in parentheses.""" + return ExprTermRule([LPAR(), StubExpression(value), RPAR()]) + + +def _make_binary_operator(op_str): + """Build BinaryOperatorRule for an operator string.""" + return BinaryOperatorRule([BINARY_OP(op_str)]) + + +def _make_binary_term(op_str, rhs_value): + """Build BinaryTermRule with given operator and RHS value.""" + return BinaryTermRule([_make_binary_operator(op_str), _make_expr_term(rhs_value)]) + + +MINUS_TOKEN = StringToken["MINUS"] +NOT_TOKEN = StringToken["NOT"] + + +# --- ExprTermRule tests --- + + +class TestExprTermRule(TestCase): + def test_lark_name(self): + self.assertEqual(ExprTermRule.lark_name(), "expr_term") + + def test_construction_without_parens(self): + stub = StubExpression("a") + rule = ExprTermRule([stub]) + self.assertFalse(rule.parentheses) + + def test_construction_without_parens_children_structure(self): + stub = StubExpression("a") + rule = ExprTermRule([stub]) + # children: [None, None, stub, None, None] + self.assertEqual(len(rule.children), 5) + self.assertIsNone(rule.children[0]) + self.assertIsNone(rule.children[1]) + self.assertIs(rule.children[2], stub) + self.assertIsNone(rule.children[3]) + self.assertIsNone(rule.children[4]) + + def test_construction_with_parens(self): + stub = StubExpression("a") + rule = ExprTermRule([LPAR(), stub, RPAR()]) + self.assertTrue(rule.parentheses) + + def test_construction_with_parens_children_structure(self): + stub = StubExpression("a") + lpar = LPAR() + rpar = RPAR() + rule = ExprTermRule([lpar, stub, rpar]) + # children: [LPAR, None, stub, None, RPAR] + self.assertEqual(len(rule.children), 5) + self.assertIs(rule.children[0], lpar) + self.assertIsNone(rule.children[1]) + self.assertIs(rule.children[2], stub) + self.assertIsNone(rule.children[3]) + self.assertIs(rule.children[4], rpar) + + def test_expression_property(self): + stub = StubExpression("a") + rule = ExprTermRule([stub]) + self.assertIs(rule.expression, stub) + + def test_expression_property_with_parens(self): + stub = StubExpression("a") + rule = ExprTermRule([LPAR(), stub, RPAR()]) + self.assertIs(rule.expression, stub) + + def test_serialize_no_parens_delegates_to_inner(self): + rule = _make_expr_term("hello") + self.assertEqual(rule.serialize(), "hello") + + def test_serialize_no_parens_passes_through_int(self): + stub = StubExpression(42) + rule = ExprTermRule([stub]) + self.assertEqual(rule.serialize(), 42) + + def test_serialize_with_parens_wraps_and_dollar(self): + rule = _make_paren_expr_term("a") + result = rule.serialize() + self.assertEqual(result, "${(a)}") + + def test_serialize_with_parens_inside_dollar_string(self): + rule = _make_paren_expr_term("a") + ctx = SerializationContext(inside_dollar_string=True) + result = rule.serialize(context=ctx) + # Inside dollar string: wraps in () but NOT in ${} + self.assertEqual(result, "(a)") + + def test_serialize_sets_inside_parentheses_context(self): + """When parenthesized, inner expression should see inside_parentheses=True.""" + seen_context = {} + + class ContextCapture(ExpressionRule): + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ): + seen_context["inside_parentheses"] = context.inside_parentheses + return "x" + + rule = ExprTermRule([LPAR(), ContextCapture([]), RPAR()]) + rule.serialize() + self.assertTrue(seen_context["inside_parentheses"]) + + def test_serialize_no_parens_preserves_inside_parentheses(self): + """Without parens, inside_parentheses passes through from caller context.""" + seen_context = {} + + class ContextCapture(ExpressionRule): + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ): + seen_context["inside_parentheses"] = context.inside_parentheses + return "x" + + rule = ExprTermRule([ContextCapture([])]) + rule.serialize(context=SerializationContext(inside_parentheses=False)) + self.assertFalse(seen_context["inside_parentheses"]) + + +# --- ConditionalRule tests --- + + +class TestConditionalRule(TestCase): + def _make_conditional(self, cond_val="cond", true_val="yes", false_val="no"): + return ConditionalRule( + [ + StubExpression(cond_val), + QMARK(), + StubExpression(true_val), + COLON(), + StubExpression(false_val), + ] + ) + + def test_lark_name(self): + self.assertEqual(ConditionalRule.lark_name(), "conditional") + + def test_construction_inserts_optional_slots(self): + rule = self._make_conditional() + # Should have 8 children after _insert_optionals at [2, 4, 6] + self.assertEqual(len(rule.children), 8) + + def test_condition_property(self): + cond = StubExpression("cond") + rule = ConditionalRule( + [cond, QMARK(), StubExpression("t"), COLON(), StubExpression("f")] + ) + self.assertIs(rule.condition, cond) + + def test_if_true_property(self): + true_expr = StubExpression("yes") + rule = ConditionalRule( + [ + StubExpression("c"), + QMARK(), + true_expr, + COLON(), + StubExpression("f"), + ] + ) + self.assertIs(rule.if_true, true_expr) + + def test_if_false_property(self): + false_expr = StubExpression("no") + rule = ConditionalRule( + [ + StubExpression("c"), + QMARK(), + StubExpression("t"), + COLON(), + false_expr, + ] + ) + self.assertIs(rule.if_false, false_expr) + + def test_serialize_format(self): + rule = self._make_conditional("a", "b", "c") + result = rule.serialize() + self.assertEqual(result, "${a ? b : c}") + + def test_serialize_wraps_in_dollar_string(self): + rule = self._make_conditional("x", "y", "z") + result = rule.serialize() + self.assertTrue(result.startswith("${")) + self.assertTrue(result.endswith("}")) + + def test_serialize_no_double_wrap_inside_dollar_string(self): + rule = self._make_conditional("x", "y", "z") + ctx = SerializationContext(inside_dollar_string=True) + result = rule.serialize(context=ctx) + self.assertEqual(result, "x ? y : z") + + def test_serialize_force_parens_no_parent(self): + """force_operation_parentheses with no parent → no wrapping.""" + rule = self._make_conditional("a", "b", "c") + opts = SerializationOptions(force_operation_parentheses=True) + result = rule.serialize(options=opts) + # No parent, so _wrap_into_parentheses returns unchanged + self.assertEqual(result, "${a ? b : c}") + + def test_serialize_force_parens_with_expression_parent(self): + """force_operation_parentheses with ExpressionRule parent → wraps.""" + rule = self._make_conditional("a", "b", "c") + # Nest inside another expression to set parent + StubExpression("outer", children=[rule]) + opts = SerializationOptions(force_operation_parentheses=True) + result = rule.serialize(options=opts) + self.assertEqual(result, "${(a ? b : c)}") + + +# --- BinaryTermRule tests --- + + +class TestBinaryTermRule(TestCase): + def test_lark_name(self): + self.assertEqual(BinaryTermRule.lark_name(), "binary_term") + + def test_construction_inserts_optional(self): + rule = _make_binary_term("+", "b") + # [BinaryOperatorRule, None, ExprTermRule] + self.assertEqual(len(rule.children), 3) + self.assertIsNone(rule.children[1]) + + def test_binary_operator_property(self): + op = _make_binary_operator("+") + rhs = _make_expr_term("b") + rule = BinaryTermRule([op, rhs]) + self.assertIs(rule.binary_operator, op) + + def test_expr_term_property(self): + op = _make_binary_operator("+") + rhs = _make_expr_term("b") + rule = BinaryTermRule([op, rhs]) + self.assertIs(rule.expr_term, rhs) + + def test_serialize(self): + rule = _make_binary_term("+", "b") + result = rule.serialize() + self.assertEqual(result, "+ b") + + def test_serialize_equals_operator(self): + rule = _make_binary_term("==", "x") + self.assertEqual(rule.serialize(), "== x") + + def test_serialize_and_operator(self): + rule = _make_binary_term("&&", "y") + self.assertEqual(rule.serialize(), "&& y") + + +# --- BinaryOpRule tests --- + + +class TestBinaryOpRule(TestCase): + def _make_binary_op(self, lhs_val, op_str, rhs_val): + lhs = _make_expr_term(lhs_val) + bt = _make_binary_term(op_str, rhs_val) + return BinaryOpRule([lhs, bt, None]) + + def test_lark_name(self): + self.assertEqual(BinaryOpRule.lark_name(), "binary_op") + + def test_expr_term_property(self): + lhs = _make_expr_term("a") + bt = _make_binary_term("+", "b") + rule = BinaryOpRule([lhs, bt, None]) + self.assertIs(rule.expr_term, lhs) + + def test_binary_term_property(self): + lhs = _make_expr_term("a") + bt = _make_binary_term("+", "b") + rule = BinaryOpRule([lhs, bt, None]) + self.assertIs(rule.binary_term, bt) + + def test_serialize_addition(self): + rule = self._make_binary_op("a", "+", "b") + self.assertEqual(rule.serialize(), "${a + b}") + + def test_serialize_equality(self): + rule = self._make_binary_op("x", "==", "y") + self.assertEqual(rule.serialize(), "${x == y}") + + def test_serialize_and(self): + rule = self._make_binary_op("p", "&&", "q") + self.assertEqual(rule.serialize(), "${p && q}") + + def test_serialize_multiply(self): + rule = self._make_binary_op("a", "*", "b") + self.assertEqual(rule.serialize(), "${a * b}") + + def test_serialize_no_double_wrap_inside_dollar_string(self): + rule = self._make_binary_op("a", "+", "b") + ctx = SerializationContext(inside_dollar_string=True) + result = rule.serialize(context=ctx) + self.assertEqual(result, "a + b") + + def test_serialize_force_parens_no_parent(self): + """No parent → _wrap_into_parentheses returns unchanged.""" + rule = self._make_binary_op("a", "+", "b") + opts = SerializationOptions(force_operation_parentheses=True) + result = rule.serialize(options=opts) + self.assertEqual(result, "${a + b}") + + def test_serialize_force_parens_with_expression_parent(self): + """With ExpressionRule parent → wraps in parens.""" + rule = self._make_binary_op("a", "+", "b") + StubExpression("outer", children=[rule]) + opts = SerializationOptions(force_operation_parentheses=True) + result = rule.serialize(options=opts) + self.assertEqual(result, "${(a + b)}") + + def test_serialize_force_parens_inside_dollar_string_with_parent(self): + """Inside dollar string + parent → parens without extra ${}.""" + rule = self._make_binary_op("a", "+", "b") + StubExpression("outer", children=[rule]) + opts = SerializationOptions(force_operation_parentheses=True) + ctx = SerializationContext(inside_dollar_string=True) + result = rule.serialize(options=opts, context=ctx) + self.assertEqual(result, "(a + b)") + + +# --- UnaryOpRule tests --- + + +class TestUnaryOpRule(TestCase): + def _make_unary(self, op_str, operand_val): + token_cls = MINUS_TOKEN if op_str == "-" else NOT_TOKEN + token = token_cls(op_str) + expr_term = _make_expr_term(operand_val) + return UnaryOpRule([token, expr_term]) + + def test_lark_name(self): + self.assertEqual(UnaryOpRule.lark_name(), "unary_op") + + def test_operator_property_minus(self): + rule = self._make_unary("-", "x") + self.assertEqual(rule.operator, "-") + + def test_operator_property_not(self): + rule = self._make_unary("!", "x") + self.assertEqual(rule.operator, "!") + + def test_expr_term_property(self): + expr_term = _make_expr_term("x") + token = MINUS_TOKEN("-") + rule = UnaryOpRule([token, expr_term]) + self.assertIs(rule.expr_term, expr_term) + + def test_serialize_minus(self): + rule = self._make_unary("-", "a") + self.assertEqual(rule.serialize(), "${-a}") + + def test_serialize_not(self): + rule = self._make_unary("!", "flag") + self.assertEqual(rule.serialize(), "${!flag}") + + def test_serialize_no_double_wrap_inside_dollar_string(self): + rule = self._make_unary("-", "x") + ctx = SerializationContext(inside_dollar_string=True) + result = rule.serialize(context=ctx) + self.assertEqual(result, "-x") + + def test_serialize_force_parens_no_parent(self): + rule = self._make_unary("-", "x") + opts = SerializationOptions(force_operation_parentheses=True) + result = rule.serialize(options=opts) + self.assertEqual(result, "${-x}") + + def test_serialize_force_parens_with_expression_parent(self): + rule = self._make_unary("-", "x") + StubExpression("outer", children=[rule]) + opts = SerializationOptions(force_operation_parentheses=True) + result = rule.serialize(options=opts) + self.assertEqual(result, "${(-x)}") + + +# --- ExpressionRule._wrap_into_parentheses tests --- + + +class TestWrapIntoParenthesesMethod(TestCase): + def test_returns_unchanged_when_inside_parentheses(self): + expr = StubExpression("test") + ctx = SerializationContext(inside_parentheses=True) + result = expr._wrap_into_parentheses("${x}", context=ctx) + self.assertEqual(result, "${x}") + + def test_returns_unchanged_when_no_parent(self): + expr = StubExpression("test") + result = expr._wrap_into_parentheses("${x}") + self.assertEqual(result, "${x}") + + def test_returns_unchanged_when_parent_not_expression(self): + expr = StubExpression("test") + NonExpressionRule([expr]) + result = expr._wrap_into_parentheses("${x}") + self.assertEqual(result, "${x}") + + def test_wraps_when_parent_is_expression(self): + expr = StubExpression("test") + StubExpression("outer", children=[expr]) + result = expr._wrap_into_parentheses("${x}") + self.assertEqual(result, "${(x)}") + + def test_wraps_plain_string_when_parent_is_expression(self): + expr = StubExpression("test") + StubExpression("outer", children=[expr]) + result = expr._wrap_into_parentheses("a + b") + self.assertEqual(result, "(a + b)") + + def test_expr_term_parent_with_expression_grandparent(self): + """Parent is ExprTermRule, grandparent is ExpressionRule → wraps.""" + inner = StubExpression("test") + expr_term = ExprTermRule([inner]) + # inner is now at expr_term._children[2], parent=expr_term + StubExpression("grandparent", children=[expr_term]) + # expr_term.parent = grandparent (ExpressionRule) + result = inner._wrap_into_parentheses("${x}") + self.assertEqual(result, "${(x)}") + + def test_expr_term_parent_with_non_expression_grandparent(self): + """Parent is ExprTermRule, grandparent is NOT ExpressionRule → no wrap.""" + inner = StubExpression("test") + expr_term = ExprTermRule([inner]) + NonExpressionRule([expr_term]) + result = inner._wrap_into_parentheses("${x}") + self.assertEqual(result, "${x}") + + def test_expr_term_parent_with_no_grandparent(self): + """Parent is ExprTermRule with no parent → no wrap.""" + inner = StubExpression("test") + ExprTermRule([inner]) + result = inner._wrap_into_parentheses("${x}") + self.assertEqual(result, "${x}") diff --git a/test/unit/rules/test_literal_rules.py b/test/unit/rules/test_literal_rules.py new file mode 100644 index 00000000..f6b8b94c --- /dev/null +++ b/test/unit/rules/test_literal_rules.py @@ -0,0 +1,95 @@ +from unittest import TestCase + +from hcl2.rules.literal_rules import ( + TokenRule, + KeywordRule, + IdentifierRule, + IntLitRule, + FloatLitRule, + BinaryOperatorRule, +) +from hcl2.rules.tokens import NAME, BINARY_OP, IntLiteral, FloatLiteral + + +class TestKeywordRule(TestCase): + def test_lark_name(self): + self.assertEqual(KeywordRule.lark_name(), "keyword") + + def test_token_property(self): + token = NAME("true") + rule = KeywordRule([token]) + self.assertIs(rule.token, token) + + def test_serialize(self): + rule = KeywordRule([NAME("true")]) + self.assertEqual(rule.serialize(), "true") + + +class TestIdentifierRule(TestCase): + def test_lark_name(self): + self.assertEqual(IdentifierRule.lark_name(), "identifier") + + def test_serialize(self): + rule = IdentifierRule([NAME("my_var")]) + self.assertEqual(rule.serialize(), "my_var") + + def test_token_property(self): + token = NAME("foo") + rule = IdentifierRule([token]) + self.assertIs(rule.token, token) + + +class TestIntLitRule(TestCase): + def test_lark_name(self): + self.assertEqual(IntLitRule.lark_name(), "int_lit") + + def test_serialize_returns_int(self): + rule = IntLitRule([IntLiteral("42")]) + result = rule.serialize() + self.assertEqual(result, 42) + self.assertIsInstance(result, int) + + +class TestFloatLitRule(TestCase): + def test_lark_name(self): + self.assertEqual(FloatLitRule.lark_name(), "float_lit") + + def test_serialize_returns_float(self): + rule = FloatLitRule([FloatLiteral("3.14")]) + result = rule.serialize() + self.assertAlmostEqual(result, 3.14) + self.assertIsInstance(result, float) + + +class TestBinaryOperatorRule(TestCase): + def test_lark_name(self): + self.assertEqual(BinaryOperatorRule.lark_name(), "binary_operator") + + def test_serialize_plus(self): + rule = BinaryOperatorRule([BINARY_OP("+")]) + self.assertEqual(rule.serialize(), "+") + + def test_serialize_equals(self): + rule = BinaryOperatorRule([BINARY_OP("==")]) + self.assertEqual(rule.serialize(), "==") + + def test_serialize_and(self): + rule = BinaryOperatorRule([BINARY_OP("&&")]) + self.assertEqual(rule.serialize(), "&&") + + def test_serialize_or(self): + rule = BinaryOperatorRule([BINARY_OP("||")]) + self.assertEqual(rule.serialize(), "||") + + def test_serialize_gt(self): + rule = BinaryOperatorRule([BINARY_OP(">")]) + self.assertEqual(rule.serialize(), ">") + + def test_serialize_multiply(self): + rule = BinaryOperatorRule([BINARY_OP("*")]) + self.assertEqual(rule.serialize(), "*") + + def test_token_property(self): + token = BINARY_OP("+") + rule = BinaryOperatorRule([token]) + self.assertIs(rule.token, token) diff --git a/test/unit/rules/test_strings.py b/test/unit/rules/test_strings.py new file mode 100644 index 00000000..67fec075 --- /dev/null +++ b/test/unit/rules/test_strings.py @@ -0,0 +1,247 @@ +from unittest import TestCase + +from hcl2.rules.expressions import ExpressionRule +from hcl2.rules.strings import ( + InterpolationRule, + StringPartRule, + StringRule, + HeredocTemplateRule, + HeredocTrimTemplateRule, +) +from hcl2.rules.tokens import ( + INTERP_START, + RBRACE, + DBLQUOTE, + STRING_CHARS, + ESCAPED_INTERPOLATION, + HEREDOC_TEMPLATE, + HEREDOC_TRIM_TEMPLATE, +) +from hcl2.utils import SerializationOptions, SerializationContext + + +# --- Stubs --- + + +class StubExpression(ExpressionRule): + """Minimal ExpressionRule that serializes to a fixed string.""" + + def __init__(self, value): + self._stub_value = value + super().__init__([], None) + + def serialize(self, options=SerializationOptions(), context=SerializationContext()): + return self._stub_value + + +# --- Helpers --- + + +def _make_string_part_chars(text): + return StringPartRule([STRING_CHARS(text)]) + + +def _make_string_part_escaped(text): + return StringPartRule([ESCAPED_INTERPOLATION(text)]) + + +def _make_string_part_interpolation(expr_value): + interp = InterpolationRule([INTERP_START(), StubExpression(expr_value), RBRACE()]) + return StringPartRule([interp]) + + +def _make_string(parts): + """Build StringRule from a list of StringPartRule children.""" + return StringRule([DBLQUOTE(), *parts, DBLQUOTE()]) + + +# --- InterpolationRule tests --- + + +class TestInterpolationRule(TestCase): + def test_lark_name(self): + self.assertEqual(InterpolationRule.lark_name(), "interpolation") + + def test_expression_property(self): + expr = StubExpression("var.name") + rule = InterpolationRule([INTERP_START(), expr, RBRACE()]) + self.assertIs(rule.expression, expr) + + def test_serialize_wraps_in_dollar_string(self): + rule = InterpolationRule([INTERP_START(), StubExpression("var.name"), RBRACE()]) + self.assertEqual(rule.serialize(), "${var.name}") + + def test_serialize_idempotent_if_already_dollar(self): + rule = InterpolationRule([INTERP_START(), StubExpression("${x}"), RBRACE()]) + self.assertEqual(rule.serialize(), "${x}") + + def test_serialize_expression_result(self): + rule = InterpolationRule([INTERP_START(), StubExpression("a + b"), RBRACE()]) + self.assertEqual(rule.serialize(), "${a + b}") + + +# --- StringPartRule tests --- + + +class TestStringPartRule(TestCase): + def test_lark_name(self): + self.assertEqual(StringPartRule.lark_name(), "string_part") + + def test_content_property_string_chars(self): + token = STRING_CHARS("hello") + rule = StringPartRule([token]) + self.assertIs(rule.content, token) + + def test_serialize_string_chars(self): + rule = _make_string_part_chars("hello world") + self.assertEqual(rule.serialize(), "hello world") + + def test_serialize_escaped_interpolation(self): + rule = _make_string_part_escaped("$${aws:username}") + self.assertEqual(rule.serialize(), "$${aws:username}") + + def test_serialize_interpolation(self): + rule = _make_string_part_interpolation("var.name") + self.assertEqual(rule.serialize(), "${var.name}") + + def test_content_property_interpolation(self): + interp = InterpolationRule([INTERP_START(), StubExpression("x"), RBRACE()]) + rule = StringPartRule([interp]) + self.assertIs(rule.content, interp) + + +# --- StringRule tests --- + + +class TestStringRule(TestCase): + def test_lark_name(self): + self.assertEqual(StringRule.lark_name(), "string") + + def test_string_parts_property(self): + p1 = _make_string_part_chars("hello") + p2 = _make_string_part_chars(" world") + rule = _make_string([p1, p2]) + self.assertEqual(rule.string_parts, [p1, p2]) + + def test_string_parts_empty(self): + rule = _make_string([]) + self.assertEqual(rule.string_parts, []) + + def test_serialize_plain_string(self): + rule = _make_string([_make_string_part_chars("hello")]) + self.assertEqual(rule.serialize(), '"hello"') + + def test_serialize_empty_string(self): + rule = _make_string([]) + self.assertEqual(rule.serialize(), '""') + + def test_serialize_concatenated_parts(self): + rule = _make_string( + [ + _make_string_part_chars("prefix:"), + _make_string_part_interpolation("var.name"), + _make_string_part_chars("-suffix"), + ] + ) + self.assertEqual(rule.serialize(), '"prefix:${var.name}-suffix"') + + def test_serialize_escaped_and_interpolation(self): + rule = _make_string( + [ + _make_string_part_interpolation("bar"), + _make_string_part_escaped("$${baz:bat}"), + ] + ) + self.assertEqual(rule.serialize(), '"${bar}$${baz:bat}"') + + def test_serialize_only_interpolation(self): + rule = _make_string([_make_string_part_interpolation("x")]) + self.assertEqual(rule.serialize(), '"${x}"') + + +# --- HeredocTemplateRule tests --- + + +class TestHeredocTemplateRule(TestCase): + def test_lark_name(self): + self.assertEqual(HeredocTemplateRule.lark_name(), "heredoc_template") + + def test_heredoc_property(self): + token = HEREDOC_TEMPLATE("< str: + return "test_inline" + + def serialize(self, options=SerializationOptions(), context=SerializationContext()): + return "test" + + +def _make_nlc(text): + """Helper: build NewLineOrCommentRule from a string.""" + return NewLineOrCommentRule([NL_OR_COMMENT(text)]) + + +# --- Tests --- + + +class TestNewLineOrCommentRule(TestCase): + def test_lark_name(self): + self.assertEqual(NewLineOrCommentRule.lark_name(), "new_line_or_comment") + + def test_serialize_newline(self): + rule = _make_nlc("\n") + self.assertEqual(rule.serialize(), "\n") + + def test_serialize_line_comment(self): + rule = _make_nlc("// this is a comment\n") + self.assertEqual(rule.serialize(), "// this is a comment\n") + + def test_serialize_hash_comment(self): + rule = _make_nlc("# hash comment\n") + self.assertEqual(rule.serialize(), "# hash comment\n") + + def test_to_list_bare_newline_returns_none(self): + rule = _make_nlc("\n") + self.assertIsNone(rule.to_list()) + + def test_to_list_line_comment(self): + rule = _make_nlc("// my comment\n") + result = rule.to_list() + self.assertEqual(result, ["my comment"]) + + def test_to_list_hash_comment(self): + rule = _make_nlc("# my comment\n") + result = rule.to_list() + self.assertEqual(result, ["my comment"]) + + def test_to_list_block_comment(self): + rule = _make_nlc("/* block comment */\n") + result = rule.to_list() + self.assertEqual(result, ["block comment"]) + + def test_to_list_multiple_comments(self): + rule = _make_nlc("// first\n// second\n") + result = rule.to_list() + self.assertIn("first", result) + self.assertIn("second", result) + + def test_token_property(self): + token = NL_OR_COMMENT("\n") + rule = NewLineOrCommentRule([token]) + self.assertIs(rule.token, token) + + +class TestInlineCommentMixIn(TestCase): + def test_insert_optionals_inserts_none_where_no_comment(self): + from hcl2.rules.tokens import NAME + + token = NAME("x") + children = [token, NAME("y")] + mixin = ConcreteInlineComment.__new__(ConcreteInlineComment) + mixin._insert_optionals(children, [1]) + # Should have inserted None at index 1, pushing NAME("y") to index 2 + self.assertIsNone(children[1]) + self.assertEqual(len(children), 3) + + def test_insert_optionals_leaves_comment_in_place(self): + comment = _make_nlc("// comment\n") + from hcl2.rules.tokens import NAME + + children = [NAME("x"), comment] + mixin = ConcreteInlineComment.__new__(ConcreteInlineComment) + mixin._insert_optionals(children, [1]) + # Should NOT insert None since index 1 is already a NewLineOrCommentRule + self.assertIs(children[1], comment) + self.assertEqual(len(children), 2) + + def test_insert_optionals_handles_index_error(self): + children = [_make_nlc("\n")] + mixin = ConcreteInlineComment.__new__(ConcreteInlineComment) + mixin._insert_optionals(children, [3]) + # Should insert None at index 3 + self.assertEqual(len(children), 2) + self.assertIsNone(children[1]) + + def test_inline_comments_collects_from_children(self): + comment = _make_nlc("// hello\n") + from hcl2.rules.tokens import NAME + + rule = ConcreteInlineComment([NAME("x"), comment]) + result = rule.inline_comments() + self.assertEqual(result, ["hello"]) + + def test_inline_comments_skips_bare_newlines(self): + newline = _make_nlc("\n") + from hcl2.rules.tokens import NAME + + rule = ConcreteInlineComment([NAME("x"), newline]) + result = rule.inline_comments() + self.assertEqual(result, []) + + def test_inline_comments_recursive(self): + comment = _make_nlc("// inner\n") + inner = ConcreteInlineComment([comment]) + outer = ConcreteInlineComment([inner]) + result = outer.inline_comments() + self.assertEqual(result, ["inner"]) + + def test_inline_comments_empty(self): + from hcl2.rules.tokens import NAME + + rule = ConcreteInlineComment([NAME("x")]) + result = rule.inline_comments() + self.assertEqual(result, []) diff --git a/test/unit/test_utils.py b/test/unit/test_utils.py new file mode 100644 index 00000000..f5f94e8c --- /dev/null +++ b/test/unit/test_utils.py @@ -0,0 +1,148 @@ +from unittest import TestCase + +from hcl2.utils import ( + SerializationOptions, + SerializationContext, + is_dollar_string, + to_dollar_string, + unwrap_dollar_string, + wrap_into_parentheses, +) + + +class TestSerializationOptions(TestCase): + def test_default_values(self): + opts = SerializationOptions() + self.assertTrue(opts.with_comments) + self.assertFalse(opts.with_meta) + self.assertFalse(opts.wrap_objects) + self.assertFalse(opts.wrap_tuples) + self.assertTrue(opts.explicit_blocks) + self.assertTrue(opts.preserve_heredocs) + self.assertFalse(opts.force_operation_parentheses) + + def test_custom_values(self): + opts = SerializationOptions( + with_comments=False, + with_meta=True, + force_operation_parentheses=True, + ) + self.assertFalse(opts.with_comments) + self.assertTrue(opts.with_meta) + self.assertTrue(opts.force_operation_parentheses) + + +class TestSerializationContext(TestCase): + def test_default_values(self): + ctx = SerializationContext() + self.assertFalse(ctx.inside_dollar_string) + self.assertFalse(ctx.inside_parentheses) + + def test_replace_returns_new_instance(self): + ctx = SerializationContext() + new_ctx = ctx.replace(inside_dollar_string=True) + self.assertIsNot(ctx, new_ctx) + self.assertFalse(ctx.inside_dollar_string) + self.assertTrue(new_ctx.inside_dollar_string) + + def test_modify_mutates_and_restores(self): + ctx = SerializationContext() + self.assertFalse(ctx.inside_dollar_string) + + with ctx.modify(inside_dollar_string=True): + self.assertTrue(ctx.inside_dollar_string) + + self.assertFalse(ctx.inside_dollar_string) + + def test_modify_restores_on_exception(self): + ctx = SerializationContext() + + with self.assertRaises(ValueError): + with ctx.modify(inside_dollar_string=True, inside_parentheses=True): + self.assertTrue(ctx.inside_dollar_string) + self.assertTrue(ctx.inside_parentheses) + raise ValueError("test") + + self.assertFalse(ctx.inside_dollar_string) + self.assertFalse(ctx.inside_parentheses) + + def test_modify_multiple_fields(self): + ctx = SerializationContext() + with ctx.modify(inside_dollar_string=True, inside_parentheses=True): + self.assertTrue(ctx.inside_dollar_string) + self.assertTrue(ctx.inside_parentheses) + self.assertFalse(ctx.inside_dollar_string) + self.assertFalse(ctx.inside_parentheses) + + def test_copy_yields_independent_copy(self): + ctx = SerializationContext() + with ctx.copy(inside_dollar_string=True) as copied: + self.assertTrue(copied.inside_dollar_string) + self.assertFalse(ctx.inside_dollar_string) + self.assertIsNot(ctx, copied) + + +class TestIsDollarString(TestCase): + def test_valid_dollar_string(self): + self.assertTrue(is_dollar_string("${x}")) + + def test_nested_dollar_string(self): + self.assertTrue(is_dollar_string("${a + b}")) + + def test_plain_string(self): + self.assertFalse(is_dollar_string("foo")) + + def test_incomplete_prefix(self): + self.assertFalse(is_dollar_string("${")) + + def test_non_string_input(self): + self.assertFalse(is_dollar_string(42)) + self.assertFalse(is_dollar_string(None)) + + def test_empty_dollar_string(self): + self.assertTrue(is_dollar_string("${}")) + + def test_dollar_without_brace(self): + self.assertFalse(is_dollar_string("$x}")) + + def test_missing_closing_brace(self): + self.assertFalse(is_dollar_string("${x")) + + +class TestToDollarString(TestCase): + def test_wraps_plain_string(self): + self.assertEqual(to_dollar_string("x"), "${x}") + + def test_idempotent_on_dollar_string(self): + self.assertEqual(to_dollar_string("${x}"), "${x}") + + def test_wraps_empty(self): + self.assertEqual(to_dollar_string(""), "${}") + + def test_wraps_expression(self): + self.assertEqual(to_dollar_string("a + b"), "${a + b}") + + +class TestUnwrapDollarString(TestCase): + def test_strips_wrapping(self): + self.assertEqual(unwrap_dollar_string("${x}"), "x") + + def test_noop_on_plain_string(self): + self.assertEqual(unwrap_dollar_string("foo"), "foo") + + def test_strips_complex_expression(self): + self.assertEqual(unwrap_dollar_string("${a + b}"), "a + b") + + +class TestWrapIntoParentheses(TestCase): + def test_plain_string(self): + self.assertEqual(wrap_into_parentheses("x"), "(x)") + + def test_dollar_string(self): + self.assertEqual(wrap_into_parentheses("${x}"), "${(x)}") + + def test_expression_string(self): + self.assertEqual(wrap_into_parentheses("a + b"), "(a + b)") + + def test_dollar_expression(self): + self.assertEqual(wrap_into_parentheses("${a + b}"), "${(a + b)}") From 7662a5e039db786e9400531df0516154e02de666 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sun, 22 Feb 2026 20:06:33 +0100 Subject: [PATCH 21/24] rewrite api.py, update builder.py, add unit tests for them --- hcl2/__init__.py | 13 +- hcl2/__main__.py | 4 +- hcl2/api.py | 219 ++++++++++++++++++++++++++++------ hcl2/builder.py | 17 ++- hcl2/deserializer.py | 4 +- test/unit/test_api.py | 244 ++++++++++++++++++++++++++++++++++++++ test/unit/test_builder.py | 157 ++++++++++++++++++++++++ 7 files changed, 607 insertions(+), 51 deletions(-) create mode 100644 test/unit/test_api.py create mode 100644 test/unit/test_builder.py diff --git a/hcl2/__init__.py b/hcl2/__init__.py index 2d5dad09..d3a9ea7b 100644 --- a/hcl2/__init__.py +++ b/hcl2/__init__.py @@ -8,10 +8,21 @@ from .api import ( load, loads, + dump, + dumps, parse, parses, + parse_to_tree, + parses_to_tree, + from_dict, + from_json, + reconstruct, transform, - writes, + serialize, ) from .builder import Builder +from .deserializer import DeserializerOptions +from .formatter import FormatterOptions +from .rules.base import StartRule +from .utils import SerializationOptions diff --git a/hcl2/__main__.py b/hcl2/__main__.py index 17a021e1..f1a58938 100644 --- a/hcl2/__main__.py +++ b/hcl2/__main__.py @@ -19,6 +19,7 @@ from lark import UnexpectedCharacters, UnexpectedToken from . import load +from .utils import SerializationOptions from .version import __version__ @@ -58,7 +59,8 @@ def main(): else open(args.OUT_PATH, "w", encoding="utf-8") ) print(args.PATH, file=sys.stderr, flush=True) - json.dump(load(in_file, with_meta=args.with_meta), out_file) + options = SerializationOptions(with_meta=True) if args.with_meta else None + json.dump(load(in_file, serialization_options=options), out_file) if args.OUT_PATH is None: out_file.write("\n") out_file.close() diff --git a/hcl2/api.py b/hcl2/api.py index 7c384c53..0238f418 100644 --- a/hcl2/api.py +++ b/hcl2/api.py @@ -1,60 +1,205 @@ -"""The API that will be exposed to users of this package""" -from typing import TextIO +"""The API that will be exposed to users of this package. + +Follows the json module convention: load/loads for reading, dump/dumps for writing. +Also exposes intermediate pipeline stages for advanced usage. +""" + +import json as _json +from typing import TextIO, Optional from lark.tree import Tree -from hcl2.parser import parser + +from hcl2.deserializer import BaseDeserializer, DeserializerOptions +from hcl2.formatter import BaseFormatter, FormatterOptions +from hcl2.parser import parser as _get_parser from hcl2.reconstructor import HCLReconstructor +from hcl2.rules.base import StartRule from hcl2.transformer import RuleTransformer +from hcl2.utils import SerializationOptions + + +# --------------------------------------------------------------------------- +# Primary API: load / loads / dump / dumps +# --------------------------------------------------------------------------- + + +def load( + file: TextIO, + *, + serialization_options: Optional[SerializationOptions] = None, +) -> dict: + """Load a HCL2 file and return a Python dict. + + :param file: File with HCL2 content. + :param serialization_options: Options controlling serialization behavior. + """ + return loads(file.read(), serialization_options=serialization_options) + + +def loads( + text: str, + *, + serialization_options: Optional[SerializationOptions] = None, +) -> dict: + """Load HCL2 from a string and return a Python dict. + :param text: HCL2 text. + :param serialization_options: Options controlling serialization behavior. + """ + tree = parses(text) + return serialize(tree, serialization_options=serialization_options) + + +def dump( + data: dict, + file: TextIO, + *, + deserializer_options: Optional[DeserializerOptions] = None, + formatter_options: Optional[FormatterOptions] = None, +) -> None: + """Write a Python dict as HCL2 to a file. -def load(file: TextIO, with_meta=False) -> dict: - """Load a HCL2 file. - :param file: File with hcl2 to be loaded as a dict. - :param with_meta: If set to true then adds `__start_line__` and `__end_line__` - parameters to the output dict. Default to false. + :param data: Python dict (as produced by :func:`load`). + :param file: Writable text file. + :param deserializer_options: Options controlling deserialization behavior. + :param formatter_options: Options controlling formatting behavior. """ - return loads(file.read(), with_meta=with_meta) + file.write(dumps(data, deserializer_options=deserializer_options, formatter_options=formatter_options)) -def loads(text: str, with_meta=False) -> dict: - """Load HCL2 from a string. - :param text: Text with hcl2 to be loaded as a dict. - :param with_meta: If set to true then adds `__start_line__` and `__end_line__` - parameters to the output dict. Default to false. +def dumps( + data: dict, + *, + deserializer_options: Optional[DeserializerOptions] = None, + formatter_options: Optional[FormatterOptions] = None, +) -> str: + """Convert a Python dict to an HCL2 string. + + :param data: Python dict (as produced by :func:`load`). + :param deserializer_options: Options controlling deserialization behavior. + :param formatter_options: Options controlling formatting behavior. """ - # append new line as a workaround for https://github.com/lark-parser/lark/issues/237 + tree = from_dict(data, deserializer_options=deserializer_options, formatter_options=formatter_options) + return reconstruct(tree) + + +# --------------------------------------------------------------------------- +# Parsing: HCL text -> LarkElement tree or raw Lark tree +# --------------------------------------------------------------------------- + + +def parse(file: TextIO, *, discard_comments: bool = False) -> StartRule: + """Parse a HCL2 file into a LarkElement tree. + + :param file: File with HCL2 content. + :param discard_comments: If True, discard comments during transformation. + """ + return parses(file.read(), discard_comments=discard_comments) + + +def parses(text: str, *, discard_comments: bool = False) -> StartRule: + """Parse a HCL2 string into a LarkElement tree. + + :param text: HCL2 text. + :param discard_comments: If True, discard comments during transformation. + """ + lark_tree = parses_to_tree(text) + return transform(lark_tree, discard_comments=discard_comments) + + +def parse_to_tree(file: TextIO) -> Tree: + """Parse a HCL2 file into a raw Lark parse tree. + + :param file: File with HCL2 content. + """ + return parses_to_tree(file.read()) + + +def parses_to_tree(text: str) -> Tree: + """Parse a HCL2 string into a raw Lark parse tree. + + :param text: HCL2 text. + """ + # Append newline as workaround for https://github.com/lark-parser/lark/issues/237 # Lark doesn't support EOF token so our grammar can't look for "new line or end of file" - # This means that all blocks must end in a new line even if the file ends - # Append a new line as a temporary fix - tree = parser().parse(text + "\n") - return RuleTransformer().transform(tree) + return _get_parser().parse(text + "\n") -def parse(file: TextIO) -> Tree: - """Load HCL2 syntax tree from a file. - :param file: File with hcl2 to be loaded as a dict. +# --------------------------------------------------------------------------- +# Intermediate pipeline stages +# --------------------------------------------------------------------------- + + +def from_dict( + data: dict, + *, + deserializer_options: Optional[DeserializerOptions] = None, + formatter_options: Optional[FormatterOptions] = None, + format: bool = True, +) -> StartRule: + """Convert a Python dict into a LarkElement tree. + + :param data: Python dict (as produced by :func:`load`). + :param deserializer_options: Options controlling deserialization behavior. + :param formatter_options: Options controlling formatting behavior. + :param format: If True (default), apply formatting to the tree. + """ + deserializer = BaseDeserializer(deserializer_options) + tree = deserializer.load_python(data) + if format: + formatter = BaseFormatter(formatter_options) + formatter.format_tree(tree) + return tree + + +def from_json( + text: str, + *, + deserializer_options: Optional[DeserializerOptions] = None, + formatter_options: Optional[FormatterOptions] = None, + format: bool = True, +) -> StartRule: + """Convert a JSON string into a LarkElement tree. + + :param text: JSON string. + :param deserializer_options: Options controlling deserialization behavior. + :param formatter_options: Options controlling formatting behavior. + :param format: If True (default), apply formatting to the tree. """ - return parses(file.read()) + data = _json.loads(text) + return from_dict(data, deserializer_options=deserializer_options, formatter_options=formatter_options, format=format) + +def reconstruct(tree) -> str: + """Convert a LarkElement tree (or raw Lark tree) to an HCL2 string. -def parses(text: str) -> Tree: - """Load HCL2 syntax tree from a string. - :param text: Text with hcl2 to be loaded as a dict. + :param tree: A :class:`StartRule` (LarkElement tree) or :class:`lark.Tree`. """ - return parser().parse(text) + reconstructor = HCLReconstructor() + if isinstance(tree, StartRule): + tree = tree.to_lark() + return reconstructor.reconstruct(tree) -def transform(ast: Tree, with_meta=False) -> dict: - """Convert an HCL2 AST to a dictionary. - :param ast: HCL2 syntax tree, output from `parse` or `parses` - :param with_meta: If set to true then adds `__start_line__` and `__end_line__` - parameters to the output dict. Default to false. +def transform(lark_tree: Tree, *, discard_comments: bool = False) -> StartRule: + """Transform a raw Lark parse tree into a LarkElement tree. + + :param lark_tree: Raw Lark tree from :func:`parse_to_tree` or :func:`parse_string_to_tree`. + :param discard_comments: If True, discard comments during transformation. """ - return RuleTransformer().transform(ast) + return RuleTransformer(discard_new_line_or_comments=discard_comments).transform(lark_tree) + +def serialize( + tree: StartRule, + *, + serialization_options: Optional[SerializationOptions] = None, +) -> dict: + """Serialize a LarkElement tree to a Python dict. -def writes(ast: Tree) -> str: - """Convert an HCL2 syntax tree to a string. - :param ast: HCL2 syntax tree, output from `parse` or `parses` + :param tree: A :class:`StartRule` (LarkElement tree). + :param serialization_options: Options controlling serialization behavior. """ - return HCLReconstructor().reconstruct(ast) + if serialization_options is not None: + return tree.serialize(options=serialization_options) + return tree.serialize() diff --git a/hcl2/builder.py b/hcl2/builder.py index b5b149da..5ef0c416 100644 --- a/hcl2/builder.py +++ b/hcl2/builder.py @@ -3,18 +3,16 @@ from collections import defaultdict -from hcl2.const import START_LINE_KEY, END_LINE_KEY +from hcl2.const import IS_BLOCK class Builder: """ The `hcl2.Builder` class produces a dictionary that should be identical to the - output of `hcl2.load(example_file, with_meta=True)`. The `with_meta` keyword - argument is important here. HCL "blocks" in the Python dictionary are - identified by the presence of `__start_line__` and `__end_line__` metadata - within them. The `Builder` class handles adding that metadata. If that metadata - is missing, the `hcl2.reconstructor.HCLReverseTransformer` class fails to - identify what is a block and what is just an attribute with an object value. + output of `hcl2.load(example_file)`. HCL "blocks" in the Python dictionary are + identified by the presence of `__is_block__: True` markers within them. + The `Builder` class handles adding that marker. If that marker is missing, + the deserializer fails to distinguish blocks from regular object attributes. """ def __init__(self, attributes: Optional[dict] = None): @@ -49,8 +47,7 @@ def build(self): body.update( { - START_LINE_KEY: -1, - END_LINE_KEY: -1, + IS_BLOCK: True, **self.attributes, } ) @@ -79,7 +76,7 @@ def _add_nested_blocks( """Add nested blocks defined within another `Builder` instance to the `block` dictionary""" nested_block = nested_blocks_builder.build() for key, value in nested_block.items(): - if key not in (START_LINE_KEY, END_LINE_KEY): + if key != IS_BLOCK: if key not in block.keys(): block[key] = [] block[key].extend(value) diff --git a/hcl2/deserializer.py b/hcl2/deserializer.py index d6b4d4c2..0ca91b48 100644 --- a/hcl2/deserializer.py +++ b/hcl2/deserializer.py @@ -6,7 +6,7 @@ from regex import regex -from hcl2 import parses +from hcl2.parser import parser as _get_parser from hcl2.const import IS_BLOCK from hcl2.rules.abstract import LarkElement, LarkRule from hcl2.rules.base import ( @@ -217,7 +217,7 @@ def _deserialize_expression(self, value: str) -> ExprTermRule: # create HCL2 snippet value = f"temp = {value}" # parse the above - parsed_tree = parses(value) + parsed_tree = _get_parser().parse(value) # transform parsed tree into LarkElement tree rules_tree = self._transformer.transform(parsed_tree) # extract expression from the tree diff --git a/test/unit/test_api.py b/test/unit/test_api.py new file mode 100644 index 00000000..a87d9e32 --- /dev/null +++ b/test/unit/test_api.py @@ -0,0 +1,244 @@ +from io import StringIO +from unittest import TestCase + +from hcl2.api import ( + load, + loads, + dump, + dumps, + parse, + parses, + parse_to_tree, + parses_to_tree, + from_dict, + from_json, + reconstruct, + transform, + serialize, +) +from hcl2.rules.base import StartRule +from hcl2.utils import SerializationOptions +from hcl2.deserializer import DeserializerOptions +from hcl2.formatter import FormatterOptions +from lark.tree import Tree + + +SIMPLE_HCL = 'x = 5\n' +SIMPLE_DICT = {"x": 5} + +BLOCK_HCL = 'resource "aws_instance" "example" {\n ami = "abc-123"\n}\n' + + +class TestLoads(TestCase): + + def test_simple_attribute(self): + result = loads(SIMPLE_HCL) + self.assertEqual(result["x"], 5) + + def test_returns_dict(self): + result = loads(SIMPLE_HCL) + self.assertIsInstance(result, dict) + + def test_with_serialization_options(self): + result = loads(SIMPLE_HCL, serialization_options=SerializationOptions(with_comments=False)) + self.assertIsInstance(result, dict) + self.assertEqual(result["x"], 5) + + def test_with_meta_option(self): + result = loads(SIMPLE_HCL, serialization_options=SerializationOptions(with_meta=True)) + self.assertIn("x", result) + + def test_block_parsing(self): + result = loads(BLOCK_HCL) + self.assertIn("resource", result) + + +class TestLoad(TestCase): + + def test_from_file(self): + f = StringIO(SIMPLE_HCL) + result = load(f) + self.assertEqual(result["x"], 5) + + def test_with_serialization_options(self): + f = StringIO(SIMPLE_HCL) + result = load(f, serialization_options=SerializationOptions(with_comments=False)) + self.assertEqual(result["x"], 5) + + +class TestDumps(TestCase): + + def test_simple_attribute(self): + result = dumps(SIMPLE_DICT) + self.assertIsInstance(result, str) + self.assertIn("x", result) + self.assertIn("5", result) + + def test_dumps_contains_key_and_value(self): + result = dumps(SIMPLE_DICT) + self.assertIn("x", result) + self.assertIn("5", result) + + def test_with_deserializer_options(self): + result = dumps(SIMPLE_DICT, deserializer_options=DeserializerOptions()) + self.assertIsInstance(result, str) + + def test_with_formatter_options(self): + result = dumps(SIMPLE_DICT, formatter_options=FormatterOptions()) + self.assertIsInstance(result, str) + + +class TestDump(TestCase): + + def test_writes_to_file(self): + f = StringIO() + dump(SIMPLE_DICT, f) + output = f.getvalue() + self.assertIn("x", output) + self.assertIn("5", output) + + +class TestParsesToTree(TestCase): + + def test_returns_lark_tree(self): + result = parses_to_tree(SIMPLE_HCL) + self.assertIsInstance(result, Tree) + + def test_tree_has_start_rule(self): + result = parses_to_tree(SIMPLE_HCL) + self.assertEqual(result.data, "start") + + +class TestParseToTree(TestCase): + + def test_from_file(self): + f = StringIO(SIMPLE_HCL) + result = parse_to_tree(f) + self.assertIsInstance(result, Tree) + + +class TestParses(TestCase): + + def test_returns_start_rule(self): + result = parses(SIMPLE_HCL) + self.assertIsInstance(result, StartRule) + + def test_discard_comments_false(self): + hcl = '# comment\nx = 5\n' + result = parses(hcl, discard_comments=False) + serialized = serialize(result) + self.assertIn("__comments__", serialized) + + def test_discard_comments_true(self): + hcl = '# comment\nx = 5\n' + result = parses(hcl, discard_comments=True) + serialized = serialize(result) + self.assertNotIn("__comments__", serialized) + + +class TestParse(TestCase): + + def test_from_file(self): + f = StringIO(SIMPLE_HCL) + result = parse(f) + self.assertIsInstance(result, StartRule) + + def test_discard_comments(self): + f = StringIO('# comment\nx = 5\n') + result = parse(f, discard_comments=True) + serialized = serialize(result) + self.assertNotIn("__comments__", serialized) + + +class TestTransform(TestCase): + + def test_transforms_lark_tree(self): + lark_tree = parses_to_tree(SIMPLE_HCL) + result = transform(lark_tree) + self.assertIsInstance(result, StartRule) + + def test_discard_comments(self): + lark_tree = parses_to_tree('# comment\nx = 5\n') + result = transform(lark_tree, discard_comments=True) + serialized = serialize(result) + self.assertNotIn("__comments__", serialized) + + +class TestSerialize(TestCase): + + def test_returns_dict(self): + tree = parses(SIMPLE_HCL) + result = serialize(tree) + self.assertIsInstance(result, dict) + self.assertEqual(result["x"], 5) + + def test_with_options(self): + tree = parses(SIMPLE_HCL) + result = serialize(tree, serialization_options=SerializationOptions(with_comments=False)) + self.assertIsInstance(result, dict) + + def test_none_options_uses_defaults(self): + tree = parses(SIMPLE_HCL) + result = serialize(tree, serialization_options=None) + self.assertEqual(result["x"], 5) + + +class TestFromDict(TestCase): + + def test_returns_start_rule(self): + result = from_dict(SIMPLE_DICT) + self.assertIsInstance(result, StartRule) + + def test_roundtrip(self): + tree = from_dict(SIMPLE_DICT) + result = serialize(tree) + self.assertEqual(result["x"], 5) + + def test_without_formatting(self): + result = from_dict(SIMPLE_DICT, format=False) + self.assertIsInstance(result, StartRule) + + def test_with_deserializer_options(self): + result = from_dict(SIMPLE_DICT, deserializer_options=DeserializerOptions()) + self.assertIsInstance(result, StartRule) + + def test_with_formatter_options(self): + result = from_dict(SIMPLE_DICT, formatter_options=FormatterOptions()) + self.assertIsInstance(result, StartRule) + + +class TestFromJson(TestCase): + + def test_returns_start_rule(self): + result = from_json('{"x": 5}') + self.assertIsInstance(result, StartRule) + + def test_roundtrip(self): + tree = from_json('{"x": 5}') + result = serialize(tree) + self.assertEqual(result["x"], 5) + + def test_without_formatting(self): + result = from_json('{"x": 5}', format=False) + self.assertIsInstance(result, StartRule) + + +class TestReconstruct(TestCase): + + def test_from_start_rule(self): + tree = parses(SIMPLE_HCL) + result = reconstruct(tree) + self.assertIsInstance(result, str) + self.assertIn("x", result) + + def test_from_lark_tree(self): + lark_tree = parses_to_tree(SIMPLE_HCL) + result = reconstruct(lark_tree) + self.assertIsInstance(result, str) + self.assertIn("x", result) + + def test_roundtrip(self): + tree = parses(SIMPLE_HCL) + hcl_text = reconstruct(tree) + reparsed = loads(hcl_text) + self.assertEqual(reparsed["x"], 5) diff --git a/test/unit/test_builder.py b/test/unit/test_builder.py new file mode 100644 index 00000000..5d411c64 --- /dev/null +++ b/test/unit/test_builder.py @@ -0,0 +1,157 @@ +from unittest import TestCase + +from hcl2.builder import Builder +from hcl2.const import IS_BLOCK + + +class TestBuilderAttributes(TestCase): + + def test_empty_builder(self): + b = Builder() + result = b.build() + self.assertIn(IS_BLOCK, result) + self.assertTrue(result[IS_BLOCK]) + + def test_with_attributes(self): + b = Builder({"key": "value", "count": 3}) + result = b.build() + self.assertEqual(result["key"], "value") + self.assertEqual(result["count"], 3) + + def test_is_block_marker_present(self): + b = Builder({"x": 1}) + result = b.build() + self.assertTrue(result[IS_BLOCK]) + + +class TestBuilderBlock(TestCase): + + def test_simple_block(self): + b = Builder() + b.block("resource") + result = b.build() + self.assertIn("resource", result) + self.assertEqual(len(result["resource"]), 1) + + def test_block_with_labels(self): + b = Builder() + b.block("resource", labels=["aws_instance", "example"]) + result = b.build() + block_entry = result["resource"][0] + self.assertIn("aws_instance", block_entry) + inner = block_entry["aws_instance"] + self.assertIn("example", inner) + + def test_block_with_attributes(self): + b = Builder() + b.block("resource", labels=["type"], ami="abc-123") + result = b.build() + block = result["resource"][0]["type"] + self.assertEqual(block["ami"], "abc-123") + + def test_multiple_blocks_same_type(self): + b = Builder() + b.block("resource", labels=["type_a"]) + b.block("resource", labels=["type_b"]) + result = b.build() + self.assertEqual(len(result["resource"]), 2) + + def test_multiple_block_types(self): + b = Builder() + b.block("resource") + b.block("data") + result = b.build() + self.assertIn("resource", result) + self.assertIn("data", result) + + def test_block_returns_builder(self): + b = Builder() + child = b.block("resource") + self.assertIsInstance(child, Builder) + + def test_block_child_attributes(self): + b = Builder() + child = b.block("resource", labels=["type"]) + child.attributes["nested_key"] = "nested_val" + # Rebuild to pick up the changes + result = b.build() + block = result["resource"][0]["type"] + self.assertEqual(block["nested_key"], "nested_val") + + def test_self_reference_raises(self): + b = Builder() + with self.assertRaises(ValueError): + b.block("resource", __nested_builder__=b) + + +class TestBuilderNestedBlocks(TestCase): + + def test_nested_builder(self): + b = Builder() + inner = Builder() + inner.block("provisioner", labels=["local-exec"], command="echo hello") + b.block("resource", labels=["type"], __nested_builder__=inner) + result = b.build() + block = result["resource"][0]["type"] + self.assertIn("provisioner", block) + + def test_nested_blocks_merged(self): + b = Builder() + inner = Builder() + inner.block("sub_block", x=1) + inner.block("sub_block", x=2) + b.block("resource", __nested_builder__=inner) + result = b.build() + block = result["resource"][0] + self.assertEqual(len(block["sub_block"]), 2) + + +class TestBuilderBlockMarker(TestCase): + + def test_block_marker_is_is_block(self): + """Verify IS_BLOCK marker is used (not __start_line__/__end_line__).""" + b = Builder({"x": 1}) + result = b.build() + self.assertIn(IS_BLOCK, result) + self.assertTrue(result[IS_BLOCK]) + self.assertNotIn("__start_line__", result) + self.assertNotIn("__end_line__", result) + + def test_nested_blocks_skip_is_block_key(self): + """_add_nested_blocks should skip IS_BLOCK when merging.""" + b = Builder() + inner = Builder() + inner.block("sub", val=1) + b.block("parent", __nested_builder__=inner) + result = b.build() + parent_block = result["parent"][0] + # sub blocks should be present, but IS_BLOCK from inner should not leak as a list + self.assertIn("sub", parent_block) + # IS_BLOCK should be a bool marker, not a list + self.assertTrue(parent_block[IS_BLOCK]) + + +class TestBuilderIntegration(TestCase): + + def test_full_document(self): + doc = Builder() + doc.block( + "resource", + labels=["aws_instance", "web"], + ami="ami-12345", + instance_type="t2.micro", + ) + doc.block( + "resource", + labels=["aws_s3_bucket", "data"], + bucket="my-bucket", + ) + result = doc.build() + self.assertEqual(len(result["resource"]), 2) + + web = result["resource"][0]["aws_instance"]["web"] + self.assertEqual(web["ami"], "ami-12345") + self.assertEqual(web["instance_type"], "t2.micro") + + data = result["resource"][1]["aws_s3_bucket"]["data"] + self.assertEqual(data["bucket"], "my-bucket") From c05273d26e1c751266f1c924a9a96f12ac5fcdc9 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sun, 22 Feb 2026 20:06:58 +0100 Subject: [PATCH 22/24] reorganize "round-trip" tests into integration tests --- test/{round_trip => integration}/__init__.py | 0 .../hcl2_original/operators.tf} | 0 .../hcl2_original/smoke.tf | 0 .../hcl2_reconstructed/operators.tf} | 0 .../hcl2_reconstructed/smoke.tf | 0 .../json_reserialized/operators.json} | 0 .../json_reserialized/smoke.json | 0 .../json_serialized/operators.json} | 0 .../json_serialized/smoke.json | 0 .../specialized/builder_basic.json | 63 +++++++++++++++ test/integration/specialized/builder_basic.tf | 38 +++++++++ .../specialized/builder_basic_reparsed.json | 64 +++++++++++++++ .../builder_basic_reserialized.json | 62 +++++++++++++++ .../specialized}/operator_precedence.json | 0 .../specialized/operator_precedence.tf | 15 ++++ .../test_round_trip.py | 48 +++--------- test/integration/test_specialized.py | 77 +++++++++++++++++++ 17 files changed, 331 insertions(+), 36 deletions(-) rename test/{round_trip => integration}/__init__.py (100%) rename test/{round_trip/hcl2_original/operator_precedence.tf => integration/hcl2_original/operators.tf} (100%) rename test/{round_trip => integration}/hcl2_original/smoke.tf (100%) rename test/{round_trip/hcl2_reconstructed/operator_precedence.tf => integration/hcl2_reconstructed/operators.tf} (100%) rename test/{round_trip => integration}/hcl2_reconstructed/smoke.tf (100%) rename test/{round_trip/json_reserialized/operator_precedence.json => integration/json_reserialized/operators.json} (100%) rename test/{round_trip => integration}/json_reserialized/smoke.json (100%) rename test/{round_trip/json_serialized/operator_precedence.json => integration/json_serialized/operators.json} (100%) rename test/{round_trip => integration}/json_serialized/smoke.json (100%) create mode 100644 test/integration/specialized/builder_basic.json create mode 100644 test/integration/specialized/builder_basic.tf create mode 100644 test/integration/specialized/builder_basic_reparsed.json create mode 100644 test/integration/specialized/builder_basic_reserialized.json rename test/{round_trip/special => integration/specialized}/operator_precedence.json (100%) create mode 100644 test/integration/specialized/operator_precedence.tf rename test/{round_trip => integration}/test_round_trip.py (78%) create mode 100644 test/integration/test_specialized.py diff --git a/test/round_trip/__init__.py b/test/integration/__init__.py similarity index 100% rename from test/round_trip/__init__.py rename to test/integration/__init__.py diff --git a/test/round_trip/hcl2_original/operator_precedence.tf b/test/integration/hcl2_original/operators.tf similarity index 100% rename from test/round_trip/hcl2_original/operator_precedence.tf rename to test/integration/hcl2_original/operators.tf diff --git a/test/round_trip/hcl2_original/smoke.tf b/test/integration/hcl2_original/smoke.tf similarity index 100% rename from test/round_trip/hcl2_original/smoke.tf rename to test/integration/hcl2_original/smoke.tf diff --git a/test/round_trip/hcl2_reconstructed/operator_precedence.tf b/test/integration/hcl2_reconstructed/operators.tf similarity index 100% rename from test/round_trip/hcl2_reconstructed/operator_precedence.tf rename to test/integration/hcl2_reconstructed/operators.tf diff --git a/test/round_trip/hcl2_reconstructed/smoke.tf b/test/integration/hcl2_reconstructed/smoke.tf similarity index 100% rename from test/round_trip/hcl2_reconstructed/smoke.tf rename to test/integration/hcl2_reconstructed/smoke.tf diff --git a/test/round_trip/json_reserialized/operator_precedence.json b/test/integration/json_reserialized/operators.json similarity index 100% rename from test/round_trip/json_reserialized/operator_precedence.json rename to test/integration/json_reserialized/operators.json diff --git a/test/round_trip/json_reserialized/smoke.json b/test/integration/json_reserialized/smoke.json similarity index 100% rename from test/round_trip/json_reserialized/smoke.json rename to test/integration/json_reserialized/smoke.json diff --git a/test/round_trip/json_serialized/operator_precedence.json b/test/integration/json_serialized/operators.json similarity index 100% rename from test/round_trip/json_serialized/operator_precedence.json rename to test/integration/json_serialized/operators.json diff --git a/test/round_trip/json_serialized/smoke.json b/test/integration/json_serialized/smoke.json similarity index 100% rename from test/round_trip/json_serialized/smoke.json rename to test/integration/json_serialized/smoke.json diff --git a/test/integration/specialized/builder_basic.json b/test/integration/specialized/builder_basic.json new file mode 100644 index 00000000..da62720b --- /dev/null +++ b/test/integration/specialized/builder_basic.json @@ -0,0 +1,63 @@ +{ + "__is_block__": true, + "resource": [ + { + "aws_instance": { + "web": { + "__is_block__": true, + "ami": "\"ami-12345\"", + "instance_type": "\"t2.micro\"", + "count": 2 + } + } + }, + { + "aws_s3_bucket": { + "data": { + "__is_block__": true, + "bucket": "\"my-bucket\"", + "acl": "\"private\"" + } + } + }, + { + "aws_instance": { + "nested": { + "__is_block__": true, + "ami": "\"ami-99999\"", + "provisioner": [ + { + "local-exec": { + "__is_block__": true, + "command": "\"echo hello\"" + } + }, + { + "remote-exec": { + "__is_block__": true, + "inline": "[\"puppet apply\"]" + } + } + ] + } + } + } + ], + "variable": [ + { + "instance_type": { + "__is_block__": true, + "default": "\"t2.micro\"", + "description": "\"The instance type\"" + } + } + ], + "locals": [ + { + "__is_block__": true, + "port": 8080, + "enabled": true, + "name": "\"my-app\"" + } + ] +} diff --git a/test/integration/specialized/builder_basic.tf b/test/integration/specialized/builder_basic.tf new file mode 100644 index 00000000..b7ee2131 --- /dev/null +++ b/test/integration/specialized/builder_basic.tf @@ -0,0 +1,38 @@ +resource aws_instance web { + ami = "ami-12345" + instance_type = "t2.micro" + count = 2 +} + + +resource aws_s3_bucket data { + bucket = "my-bucket" + acl = "private" +} + + +resource aws_instance nested { + ami = "ami-99999" + + provisioner local-exec { + command = "echo hello" + } + + + provisioner remote-exec { + inline = ["puppet apply"] + } +} + + +variable instance_type { + default = "t2.micro" + description = "The instance type" +} + + +locals { + port = 8080 + enabled = true + name = "my-app" +} diff --git a/test/integration/specialized/builder_basic_reparsed.json b/test/integration/specialized/builder_basic_reparsed.json new file mode 100644 index 00000000..32e4954d --- /dev/null +++ b/test/integration/specialized/builder_basic_reparsed.json @@ -0,0 +1,64 @@ +{ + "resource": [ + { + "aws_instance": { + "web": { + "ami": "\"ami-12345\"", + "instance_type": "\"t2.micro\"", + "count": 2, + "__is_block__": true + } + } + }, + { + "aws_s3_bucket": { + "data": { + "bucket": "\"my-bucket\"", + "acl": "\"private\"", + "__is_block__": true + } + } + }, + { + "aws_instance": { + "nested": { + "ami": "\"ami-99999\"", + "provisioner": [ + { + "local-exec": { + "command": "\"echo hello\"", + "__is_block__": true + } + }, + { + "remote-exec": { + "inline": [ + "\"puppet apply\"" + ], + "__is_block__": true + } + } + ], + "__is_block__": true + } + } + } + ], + "variable": [ + { + "instance_type": { + "default": "\"t2.micro\"", + "description": "\"The instance type\"", + "__is_block__": true + } + } + ], + "locals": [ + { + "port": 8080, + "enabled": "true", + "name": "\"my-app\"", + "__is_block__": true + } + ] +} diff --git a/test/integration/specialized/builder_basic_reserialized.json b/test/integration/specialized/builder_basic_reserialized.json new file mode 100644 index 00000000..364ef0c3 --- /dev/null +++ b/test/integration/specialized/builder_basic_reserialized.json @@ -0,0 +1,62 @@ +{ + "resource": [ + { + "aws_instance": { + "web": { + "ami": "\"ami-12345\"", + "instance_type": "\"t2.micro\"", + "count": 2, + "__is_block__": true + } + } + }, + { + "aws_s3_bucket": { + "data": { + "bucket": "\"my-bucket\"", + "acl": "\"private\"", + "__is_block__": true + } + } + }, + { + "aws_instance": { + "nested": { + "ami": "\"ami-99999\"", + "provisioner": [ + { + "local-exec": { + "command": "\"echo hello\"", + "__is_block__": true + } + }, + { + "remote-exec": { + "inline": "[\"puppet apply\"]", + "__is_block__": true + } + } + ], + "__is_block__": true + } + } + } + ], + "variable": [ + { + "instance_type": { + "default": "\"t2.micro\"", + "description": "\"The instance type\"", + "__is_block__": true + } + } + ], + "locals": [ + { + "port": 8080, + "enabled": "true", + "name": "\"my-app\"", + "__is_block__": true + } + ] +} diff --git a/test/round_trip/special/operator_precedence.json b/test/integration/specialized/operator_precedence.json similarity index 100% rename from test/round_trip/special/operator_precedence.json rename to test/integration/specialized/operator_precedence.json diff --git a/test/integration/specialized/operator_precedence.tf b/test/integration/specialized/operator_precedence.tf new file mode 100644 index 00000000..f8351161 --- /dev/null +++ b/test/integration/specialized/operator_precedence.tf @@ -0,0 +1,15 @@ +locals { + addition_1 = ((a + b) + c) + addition_2 = a + b + addition_3 = (a + b) + eq_before_and = var.env == "prod" && var.debug + and_before_ternary = true && true ? 1 : 0 + mixed_arith_cmp = var.a + var.b * var.c > 10 + full_chain = a + b == c && d || e + left_assoc_sub = a - b - c + left_assoc_mul_div = (a * b) / c + nested_ternary = (a ? b : c) ? d : e + unary_precedence = !a && b + neg_precedence = (-a) + b + neg_parentheses = -(a + b) +} diff --git a/test/round_trip/test_round_trip.py b/test/integration/test_round_trip.py similarity index 78% rename from test/round_trip/test_round_trip.py rename to test/integration/test_round_trip.py index 93fcd111..3d2bbbb0 100644 --- a/test/round_trip/test_round_trip.py +++ b/test/integration/test_round_trip.py @@ -1,7 +1,7 @@ """Round-trip tests for the HCL2 → JSON → HCL2 pipeline. -Every test starts from the source HCL files in test/round_trip/hcl2/ and -runs the pipeline forward from there, comparing actuals against expected +Every test starts from the source HCL files in test/integration/hcl2_original/ +and runs the pipeline forward from there, comparing actuals against expected outputs at each stage: 1. HCL → JSON serialization (parse + transform + serialize) @@ -16,23 +16,20 @@ from typing import List from unittest import TestCase -from hcl2 import parses +from hcl2.api import parses_to_tree from hcl2.deserializer import BaseDeserializer from hcl2.formatter import BaseFormatter from hcl2.reconstructor import HCLReconstructor from hcl2.transformer import RuleTransformer -from hcl2.utils import SerializationOptions -ROUND_TRIP_DIR = Path(__file__).absolute().parent -HCL2_ORIGINAL_DIR = ROUND_TRIP_DIR / "hcl2_original" - -SPECIAL_DIR = ROUND_TRIP_DIR / "special" +INTEGRATION_DIR = Path(__file__).absolute().parent +HCL2_ORIGINAL_DIR = INTEGRATION_DIR / "hcl2_original" _STEP_DIRS = { "hcl2_original": HCL2_ORIGINAL_DIR, - "hcl2_reconstructed": ROUND_TRIP_DIR / "hcl2_reconstructed", - "json_serialized": ROUND_TRIP_DIR / "json_serialized", - "json_reserialized": ROUND_TRIP_DIR / "json_reserialized", + "hcl2_reconstructed": INTEGRATION_DIR / "hcl2_reconstructed", + "json_serialized": INTEGRATION_DIR / "json_serialized", + "json_reserialized": INTEGRATION_DIR / "json_reserialized", } _STEP_SUFFIXES = { @@ -53,7 +50,7 @@ class SuiteStep(Enum): def _get_suites() -> List[str]: """ Get a list of the test suites. - Names of a test suite is a name of file in `test/round_trip/hcl2_original/` without the .tf suffix. + Names of a test suite is a name of file in `test/integration/hcl2_original/` without the .tf suffix. Override SUITES to run a specific subset, e.g. SUITES = ["config"] """ @@ -63,7 +60,7 @@ def _get_suites() -> List[str]: # set this to arbitrary list of test suites to run, -# e.g. `SUITES = ["smoke"]` to run the tests only for `test/round_trip/hcl2_original/smoke.tf` +# e.g. `SUITES = ["smoke"]` to run the tests only for `test/integration/hcl2_original/smoke.tf` SUITES: List[str] = [] @@ -72,9 +69,9 @@ def _get_suite_file(suite_name: str, step: SuiteStep) -> Path: return _STEP_DIRS[step.value] / (suite_name + _STEP_SUFFIXES[step.value]) -def _parse_and_serialize(hcl_text: str, options: SerializationOptions = None) -> dict: +def _parse_and_serialize(hcl_text: str, options=None) -> dict: """Parse HCL text and serialize to a Python dict.""" - parsed_tree = parses(hcl_text) + parsed_tree = parses_to_tree(hcl_text) rules = RuleTransformer().transform(parsed_tree) if options: return rules.serialize(options=options) @@ -192,24 +189,3 @@ def test_full_round_trip(self): f"Full round-trip mismatch for {suite}: " f"HCL → JSON → HCL → JSON did not produce identical JSON", ) - - -class TestOperatorPrecedence(TestCase): - """Test that parsed expressions correctly represent operator precedence. - - Serializes with force_operation_parentheses=True so that implicit - precedence becomes explicit parentheses in the output. - See: https://github.com/amplify-education/python-hcl2/issues/248 - """ - - maxDiff = None - _OPTIONS = SerializationOptions(force_operation_parentheses=True) - - def test_operator_precedence(self): - hcl_path = _get_suite_file("operator_precedence", SuiteStep.ORIGINAL) - json_path = SPECIAL_DIR / "operator_precedence.json" - - actual = _parse_and_serialize(hcl_path.read_text(), options=self._OPTIONS) - expected = json.loads(json_path.read_text()) - - self.assertEqual(actual, expected) diff --git a/test/integration/test_specialized.py b/test/integration/test_specialized.py new file mode 100644 index 00000000..d1b817e2 --- /dev/null +++ b/test/integration/test_specialized.py @@ -0,0 +1,77 @@ +"""Specialized integration tests for specific features and scenarios. + +Unlike the suite-based round-trip tests, these target individual features +(operator precedence, Builder round-trip) with dedicated golden files +in test/integration/special/. +""" + +import json +from pathlib import Path +from unittest import TestCase + +from hcl2.utils import SerializationOptions + +from test.integration.test_round_trip import ( + _parse_and_serialize, + _deserialize_and_reserialize, + _deserialize_and_reconstruct, +) + +SPECIAL_DIR = Path(__file__).absolute().parent / "specialized" + + +class TestOperatorPrecedence(TestCase): + """Test that parsed expressions correctly represent operator precedence. + + Serializes with force_operation_parentheses=True so that implicit + precedence becomes explicit parentheses in the output. + See: https://github.com/amplify-education/python-hcl2/issues/248 + """ + + maxDiff = None + _OPTIONS = SerializationOptions(force_operation_parentheses=True) + + def test_operator_precedence(self): + hcl_path = SPECIAL_DIR / "operator_precedence.tf" + json_path = SPECIAL_DIR / "operator_precedence.json" + + actual = _parse_and_serialize(hcl_path.read_text(), options=self._OPTIONS) + expected = json.loads(json_path.read_text()) + + self.assertEqual(actual, expected) + + +class TestBuilderRoundTrip(TestCase): + """Test that dicts produced by Builder can be deserialized, reconstructed to + valid HCL, and reparsed back to equivalent dicts. + + Pipeline: Builder.build() → from_dict → reconstruct → HCL text + HCL text → parse → serialize → dict (compare with expected) + """ + + maxDiff = None + + def _load_special(self, name, suffix): + return (SPECIAL_DIR / f"{name}{suffix}").read_text() + + def test_builder_reconstruction(self): + """Builder dict → deserialize → reconstruct → compare with expected HCL.""" + builder_dict = json.loads(self._load_special("builder_basic", ".json")) + actual_hcl = _deserialize_and_reconstruct(builder_dict) + expected_hcl = self._load_special("builder_basic", ".tf") + self.assertMultiLineEqual(actual_hcl, expected_hcl) + + def test_builder_full_round_trip(self): + """Builder dict → reconstruct → reparse → compare with expected JSON.""" + builder_dict = json.loads(self._load_special("builder_basic", ".json")) + reconstructed_hcl = _deserialize_and_reconstruct(builder_dict) + actual = _parse_and_serialize(reconstructed_hcl) + expected = json.loads(self._load_special("builder_basic_reparsed", ".json")) + self.assertEqual(actual, expected) + + def test_builder_reserialization(self): + """Builder dict → deserialize → reserialize → compare with expected dict.""" + builder_dict = json.loads(self._load_special("builder_basic", ".json")) + reserialized = _deserialize_and_reserialize(builder_dict) + expected = json.loads(self._load_special("builder_basic_reserialized", ".json")) + self.assertEqual(reserialized, expected) From cf33fb3a05cd67c09607904f2f5ba798e6c1e2e2 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sun, 22 Feb 2026 20:17:55 +0100 Subject: [PATCH 23/24] increase coverage failure threshold --- .coveragerc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.coveragerc b/.coveragerc index 30e6dc8c..3907df05 100644 --- a/.coveragerc +++ b/.coveragerc @@ -5,8 +5,8 @@ omit = hcl2/lark_parser.py hcl2/version.py hcl2/__init__.py - hcl2/rules/__init__.py + hcl2/rules/__init__.py [report] show_missing = true -fail_under = 80 +fail_under = 90 From 020d141cbb7619c32ebd46b7b30d8ed26c813aed Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Mon, 23 Feb 2026 13:51:12 +0100 Subject: [PATCH 24/24] migrate some of existing round-trip tests to the new style, fix some related bugs --- hcl2/deserializer.py | 15 ++++++--- hcl2/hcl2.lark | 3 +- hcl2/rules/containers.py | 6 +++- hcl2/transformer.py | 3 ++ .../hcl2_original/floats.tf} | 0 .../hcl2_original}/nulls.tf | 0 test/integration/hcl2_original/object_keys.tf | 8 +++++ .../resource_keyword_attribute.tf | 8 +++++ test/integration/hcl2_original/smoke.tf | 11 ------- .../hcl2_original}/string_interpolations.tf | 6 ++-- .../hcl2_original}/unicode_strings.tf | 0 test/integration/hcl2_reconstructed/floats.tf | 26 ++++++++++++++++ test/integration/hcl2_reconstructed/nulls.tf | 11 +++++++ .../hcl2_reconstructed/object_keys.tf | 8 +++++ .../resource_keyword_attribute.tf | 8 +++++ test/integration/hcl2_reconstructed/smoke.tf | 11 ------- .../string_interpolations.tf | 9 ++++++ .../hcl2_reconstructed/unicode_strings.tf | 21 +++++++++++++ .../integration/json_reserialized/floats.json | 31 +++++++++++++++++++ test/integration/json_reserialized/nulls.json | 13 ++++++++ .../json_reserialized/object_keys.json | 10 ++++++ .../resource_keyword_attribute.json | 17 ++++++++++ test/integration/json_reserialized/smoke.json | 14 --------- .../string_interpolations.json | 18 +++++++++++ .../json_reserialized/unicode_strings.json | 21 +++++++++++++ test/integration/json_serialized/floats.json | 31 +++++++++++++++++++ test/integration/json_serialized/nulls.json | 13 ++++++++ .../json_serialized/object_keys.json | 10 ++++++ .../resource_keyword_attribute.json | 17 ++++++++++ test/integration/json_serialized/smoke.json | 14 --------- .../string_interpolations.json | 18 +++++++++++ .../json_serialized/unicode_strings.json | 21 +++++++++++++ test/unit/rules/test_containers.py | 4 +-- 33 files changed, 344 insertions(+), 62 deletions(-) rename test/{helpers/terraform-config/test_floats.tf => integration/hcl2_original/floats.tf} (100%) rename test/{helpers/terraform-config => integration/hcl2_original}/nulls.tf (100%) create mode 100644 test/integration/hcl2_original/object_keys.tf create mode 100644 test/integration/hcl2_original/resource_keyword_attribute.tf rename test/{helpers/terraform-config => integration/hcl2_original}/string_interpolations.tf (68%) rename test/{helpers/terraform-config => integration/hcl2_original}/unicode_strings.tf (100%) create mode 100644 test/integration/hcl2_reconstructed/floats.tf create mode 100644 test/integration/hcl2_reconstructed/nulls.tf create mode 100644 test/integration/hcl2_reconstructed/object_keys.tf create mode 100644 test/integration/hcl2_reconstructed/resource_keyword_attribute.tf create mode 100644 test/integration/hcl2_reconstructed/string_interpolations.tf create mode 100644 test/integration/hcl2_reconstructed/unicode_strings.tf create mode 100644 test/integration/json_reserialized/floats.json create mode 100644 test/integration/json_reserialized/nulls.json create mode 100644 test/integration/json_reserialized/object_keys.json create mode 100644 test/integration/json_reserialized/resource_keyword_attribute.json create mode 100644 test/integration/json_reserialized/string_interpolations.json create mode 100644 test/integration/json_reserialized/unicode_strings.json create mode 100644 test/integration/json_serialized/floats.json create mode 100644 test/integration/json_serialized/nulls.json create mode 100644 test/integration/json_serialized/object_keys.json create mode 100644 test/integration/json_serialized/resource_keyword_attribute.json create mode 100644 test/integration/json_serialized/string_interpolations.json create mode 100644 test/integration/json_serialized/unicode_strings.json diff --git a/hcl2/deserializer.py b/hcl2/deserializer.py index 0ca91b48..a1f9733e 100644 --- a/hcl2/deserializer.py +++ b/hcl2/deserializer.py @@ -88,7 +88,12 @@ def _transformer(self) -> RuleTransformer: return RuleTransformer() def load_python(self, value: Any) -> LarkElement: - result = StartRule([self._deserialize(value)]) + if isinstance(value, dict): + # Top-level dict is always a body (attributes + blocks), not an object + children = self._deserialize_block_elements(value) + result = StartRule([BodyRule(children)]) + else: + result = StartRule([self._deserialize(value)]) return result def loads(self, value: str) -> LarkElement: @@ -286,7 +291,7 @@ def _deserialize_object(self, value: dict) -> ObjectRule: return ObjectRule([LBRACE(), *children, RBRACE()]) - def _deserialize_object_elem(self, key: str, value: Any) -> ObjectElemRule: + def _deserialize_object_elem(self, key: Any, value: Any) -> ObjectElemRule: if self._is_expression(key): key = ObjectElemKeyExpressionRule( [ @@ -295,7 +300,7 @@ def _deserialize_object_elem(self, key: str, value: Any) -> ObjectElemRule: if child is not None ] ) - elif "." in key: + elif isinstance(key, str) and "." in key: parts = key.split(".") children = [] for part in parts: @@ -313,8 +318,8 @@ def _deserialize_object_elem(self, key: str, value: Any) -> ObjectElemRule: return ObjectElemRule(result) - def _is_expression(self, value: str) -> bool: - return value.startswith("${") and value.endswith("}") + def _is_expression(self, value: Any) -> bool: + return isinstance(value, str) and value.startswith("${") and value.endswith("}") def _is_block(self, value: Any) -> bool: """Simple check: if it's a list containing dicts with IS_BLOCK markers""" diff --git a/hcl2/hcl2.lark b/hcl2/hcl2.lark index 63154efb..4a9f1ec6 100644 --- a/hcl2/hcl2.lark +++ b/hcl2/hcl2.lark @@ -81,7 +81,8 @@ start : body // Body and basic constructs body : (new_line_or_comment? (attribute | block))* new_line_or_comment? -attribute : identifier EQ expression +attribute : _attribute_name EQ expression +_attribute_name : identifier | keyword block : identifier (identifier | string)* new_line_or_comment? LBRACE body RBRACE // Whitespace and comments diff --git a/hcl2/rules/containers.py b/hcl2/rules/containers.py index 4d7310c8..3f590c5c 100644 --- a/hcl2/rules/containers.py +++ b/hcl2/rules/containers.py @@ -96,7 +96,11 @@ def value(self) -> key_T: def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: - return self.value.serialize(options, context) + result = self.value.serialize(options, context) + # Object keys must be strings for JSON compatibility + if isinstance(result, (int, float)): + result = str(result) + return result class ObjectElemKeyExpressionRule(LarkRule): diff --git a/hcl2/transformer.py b/hcl2/transformer.py index 07230fe5..7de4f7e1 100644 --- a/hcl2/transformer.py +++ b/hcl2/transformer.py @@ -108,6 +108,9 @@ def block(self, meta: Meta, args) -> BlockRule: @v_args(meta=True) def attribute(self, meta: Meta, args) -> AttributeRule: + # _attribute_name is flattened, so args[0] may be KeywordRule or IdentifierRule + if isinstance(args[0], KeywordRule): + args[0] = IdentifierRule([NAME(args[0].token.value)], meta) return AttributeRule(args, meta) @v_args(meta=True) diff --git a/test/helpers/terraform-config/test_floats.tf b/test/integration/hcl2_original/floats.tf similarity index 100% rename from test/helpers/terraform-config/test_floats.tf rename to test/integration/hcl2_original/floats.tf diff --git a/test/helpers/terraform-config/nulls.tf b/test/integration/hcl2_original/nulls.tf similarity index 100% rename from test/helpers/terraform-config/nulls.tf rename to test/integration/hcl2_original/nulls.tf diff --git a/test/integration/hcl2_original/object_keys.tf b/test/integration/hcl2_original/object_keys.tf new file mode 100644 index 00000000..913d5a42 --- /dev/null +++ b/test/integration/hcl2_original/object_keys.tf @@ -0,0 +1,8 @@ +bar = { + 0: 0, + "foo": 1 + baz : 2, + (var.account) : 3 + (format("key_prefix_%s", local.foo)) : 4 + "prefix_${var.account}:${var.user}_suffix": 5, +} diff --git a/test/integration/hcl2_original/resource_keyword_attribute.tf b/test/integration/hcl2_original/resource_keyword_attribute.tf new file mode 100644 index 00000000..fca27d75 --- /dev/null +++ b/test/integration/hcl2_original/resource_keyword_attribute.tf @@ -0,0 +1,8 @@ +resource "custom_provider_resource" "resource_name" { + name = "resource_name" + attribute = "attribute_value" + if = "attribute_value2" + in = "attribute_value3" + for = "attribute_value4" + for_each = "attribute_value5" +} diff --git a/test/integration/hcl2_original/smoke.tf b/test/integration/hcl2_original/smoke.tf index d741a6ac..99537532 100644 --- a/test/integration/hcl2_original/smoke.tf +++ b/test/integration/hcl2_original/smoke.tf @@ -43,17 +43,6 @@ block label1 label2 { } } -block label1 label3 { - simple_interpolation = "prefix:${var}-suffix" - embedded_interpolation = "(long substring without interpolation); ${"aaa-${local}-${local}"}/us-west-2/key_foo" - deeply_nested_interpolation = "prefix1-${"prefix2-${"prefix3-$${foo:bar}"}"}" - escaped_interpolation = "prefix:$${aws:username}-suffix" - simple_and_escaped = "${"bar"}$${baz:bat}" - simple_and_escaped_reversed = "$${baz:bat}${"bar"}" - nested_escaped = "bar-${"$${baz:bat}"}" -} - - block { route53_forwarding_rule_shares = { for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) : diff --git a/test/helpers/terraform-config/string_interpolations.tf b/test/integration/hcl2_original/string_interpolations.tf similarity index 68% rename from test/helpers/terraform-config/string_interpolations.tf rename to test/integration/hcl2_original/string_interpolations.tf index 582b4aac..f9ac4e18 100644 --- a/test/helpers/terraform-config/string_interpolations.tf +++ b/test/integration/hcl2_original/string_interpolations.tf @@ -1,6 +1,6 @@ -locals { - simple_interpolation = "prefix:${var.foo}-suffix" - embedded_interpolation = "(long substring without interpolation); ${module.special_constants.aws_accounts["aaa-${local.foo}-${local.bar}"]}/us-west-2/key_foo" +block label1 label3 { + simple_interpolation = "prefix:${var}-suffix" + embedded_interpolation = "(long substring without interpolation); ${"aaa-${local}-${local}"}/us-west-2/key_foo" deeply_nested_interpolation = "prefix1-${"prefix2-${"prefix3-$${foo:bar}"}"}" escaped_interpolation = "prefix:$${aws:username}-suffix" simple_and_escaped = "${"bar"}$${baz:bat}" diff --git a/test/helpers/terraform-config/unicode_strings.tf b/test/integration/hcl2_original/unicode_strings.tf similarity index 100% rename from test/helpers/terraform-config/unicode_strings.tf rename to test/integration/hcl2_original/unicode_strings.tf diff --git a/test/integration/hcl2_reconstructed/floats.tf b/test/integration/hcl2_reconstructed/floats.tf new file mode 100644 index 00000000..810108b2 --- /dev/null +++ b/test/integration/hcl2_reconstructed/floats.tf @@ -0,0 +1,26 @@ +locals { + simple_float = 123.456 + small_float = 0.123 + large_float = 9876543.21 + negative_float = -42.5 + negative_small = -0.001 + scientific_positive = 123000.0 + scientific_negative = 0.00987 + scientific_large = 6.022e+23 + integer_as_float = 100.0 + float_calculation = 10500.0 * 3.0 / 2.1 + float_comparison = 50.0 > 2.3 ? 1.0 : 0.0 + float_list = [ + 1.1, + 2.2, + 3.3, + -4.4, + 550.0, + ] + float_object = { + pi = 3.14159, + euler = 2.71828, + sqrt2 = 1.41421, + scientific = -12300.0, + } +} diff --git a/test/integration/hcl2_reconstructed/nulls.tf b/test/integration/hcl2_reconstructed/nulls.tf new file mode 100644 index 00000000..1e487789 --- /dev/null +++ b/test/integration/hcl2_reconstructed/nulls.tf @@ -0,0 +1,11 @@ +terraform = { + unary = !null, + binary = (a == null), + tuple = [ + null, + 1, + 2, + ], + single = null, + conditional = null ? null : null, +} diff --git a/test/integration/hcl2_reconstructed/object_keys.tf b/test/integration/hcl2_reconstructed/object_keys.tf new file mode 100644 index 00000000..497e65a6 --- /dev/null +++ b/test/integration/hcl2_reconstructed/object_keys.tf @@ -0,0 +1,8 @@ +bar = { + 0 = 0, + "foo" = 1, + baz = 2, + (var.account) = 3, + (format("key_prefix_%s", local.foo)) = 4, + "prefix_${var.account}:${var.user}_suffix" = 5, +} diff --git a/test/integration/hcl2_reconstructed/resource_keyword_attribute.tf b/test/integration/hcl2_reconstructed/resource_keyword_attribute.tf new file mode 100644 index 00000000..498777e0 --- /dev/null +++ b/test/integration/hcl2_reconstructed/resource_keyword_attribute.tf @@ -0,0 +1,8 @@ +resource"custom_provider_resource""resource_name" { + name = "resource_name" + attribute = "attribute_value" + if = "attribute_value2" + in = "attribute_value3" + for = "attribute_value4" + for_each = "attribute_value5" +} diff --git a/test/integration/hcl2_reconstructed/smoke.tf b/test/integration/hcl2_reconstructed/smoke.tf index 8f17d6d6..b2de26f3 100644 --- a/test/integration/hcl2_reconstructed/smoke.tf +++ b/test/integration/hcl2_reconstructed/smoke.tf @@ -39,17 +39,6 @@ block label1 label2 { } -block label1 label3 { - simple_interpolation = "prefix:${var}-suffix" - embedded_interpolation = "(long substring without interpolation); ${"aaa-${local}-${local}"}/us-west-2/key_foo" - deeply_nested_interpolation = "prefix1-${"prefix2-${"prefix3-$${foo:bar}"}"}" - escaped_interpolation = "prefix:$${aws:username}-suffix" - simple_and_escaped = "${"bar"}$${baz:bat}" - simple_and_escaped_reversed = "$${baz:bat}${"bar"}" - nested_escaped = "bar-${"$${baz:bat}"}" -} - - block { route53_forwarding_rule_shares = { for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) : diff --git a/test/integration/hcl2_reconstructed/string_interpolations.tf b/test/integration/hcl2_reconstructed/string_interpolations.tf new file mode 100644 index 00000000..73df4715 --- /dev/null +++ b/test/integration/hcl2_reconstructed/string_interpolations.tf @@ -0,0 +1,9 @@ +block label1 label3 { + simple_interpolation = "prefix:${var}-suffix" + embedded_interpolation = "(long substring without interpolation); ${"aaa-${local}-${local}"}/us-west-2/key_foo" + deeply_nested_interpolation = "prefix1-${"prefix2-${"prefix3-$${foo:bar}"}"}" + escaped_interpolation = "prefix:$${aws:username}-suffix" + simple_and_escaped = "${"bar"}$${baz:bat}" + simple_and_escaped_reversed = "$${baz:bat}${"bar"}" + nested_escaped = "bar-${"$${baz:bat}"}" +} diff --git a/test/integration/hcl2_reconstructed/unicode_strings.tf b/test/integration/hcl2_reconstructed/unicode_strings.tf new file mode 100644 index 00000000..8c4df70e --- /dev/null +++ b/test/integration/hcl2_reconstructed/unicode_strings.tf @@ -0,0 +1,21 @@ +locals { + basic_unicode = "Hello, 世界! こんにちは Привет नमस्ते" + unicode_escapes = "© ♥ ♪ ☠ ☺" + emoji_string = "🚀 🌍 🔥 🎉" + rtl_text = "English and العربية text mixed" + complex_unicode = "Python (파이썬) es 很棒的! ♥ αβγδ" + ascii = "ASCII: abc123" + emoji = "Emoji: 🚀🌍🔥🎉" + math = "Math: ∑∫√∞≠≤≥" + currency = "Currency: £€¥₹₽₩" + arrows = "Arrows: ←↑→↓↔↕" + cjk = "CJK: 你好世界안녕하세요こんにちは" + cyrillic = "Cyrillic: Привет мир" + special = "Special: ©®™§¶†‡" + mixed_content = <<-EOT + Line with interpolation: ${var.name} + Line with emoji: 👨‍👩‍👧‍👦 + Line with quotes: "quoted text" + Line with backslash: \escaped + EOT +} diff --git a/test/integration/json_reserialized/floats.json b/test/integration/json_reserialized/floats.json new file mode 100644 index 00000000..18078a18 --- /dev/null +++ b/test/integration/json_reserialized/floats.json @@ -0,0 +1,31 @@ +{ + "locals": [ + { + "simple_float": 123.456, + "small_float": 0.123, + "large_float": 9876543.21, + "negative_float": -42.5, + "negative_small": -0.001, + "scientific_positive": 123000.0, + "scientific_negative": 0.00987, + "scientific_large": 6.022e+23, + "integer_as_float": 100.0, + "float_calculation": "${10500.0 * 3.0 / 2.1}", + "float_comparison": "${50.0 > 2.3 ? 1.0 : 0.0}", + "float_list": [ + 1.1, + 2.2, + 3.3, + -4.4, + 550.0 + ], + "float_object": { + "pi": 3.14159, + "euler": 2.71828, + "sqrt2": 1.41421, + "scientific": -12300.0 + }, + "__is_block__": true + } + ] +} diff --git a/test/integration/json_reserialized/nulls.json b/test/integration/json_reserialized/nulls.json new file mode 100644 index 00000000..9cbdd755 --- /dev/null +++ b/test/integration/json_reserialized/nulls.json @@ -0,0 +1,13 @@ +{ + "terraform": { + "unary": "${!null}", + "binary": "${(a == null)}", + "tuple": [ + "null", + 1, + 2 + ], + "single": "null", + "conditional": "${null ? null : null}" + } +} diff --git a/test/integration/json_reserialized/object_keys.json b/test/integration/json_reserialized/object_keys.json new file mode 100644 index 00000000..8acccdea --- /dev/null +++ b/test/integration/json_reserialized/object_keys.json @@ -0,0 +1,10 @@ +{ + "bar": { + "0": 0, + "\"foo\"": 1, + "baz": 2, + "${(var.account)}": 3, + "${(format(\"key_prefix_%s\", local.foo))}": 4, + "\"prefix_${var.account}:${var.user}_suffix\"": 5 + } +} diff --git a/test/integration/json_reserialized/resource_keyword_attribute.json b/test/integration/json_reserialized/resource_keyword_attribute.json new file mode 100644 index 00000000..6826a0b8 --- /dev/null +++ b/test/integration/json_reserialized/resource_keyword_attribute.json @@ -0,0 +1,17 @@ +{ + "resource": [ + { + "\"custom_provider_resource\"": { + "\"resource_name\"": { + "name": "\"resource_name\"", + "attribute": "\"attribute_value\"", + "if": "\"attribute_value2\"", + "in": "\"attribute_value3\"", + "for": "\"attribute_value4\"", + "for_each": "\"attribute_value5\"", + "__is_block__": true + } + } + } + ] +} diff --git a/test/integration/json_reserialized/smoke.json b/test/integration/json_reserialized/smoke.json index 48544f85..670c5be3 100644 --- a/test/integration/json_reserialized/smoke.json +++ b/test/integration/json_reserialized/smoke.json @@ -48,20 +48,6 @@ } } }, - { - "label1": { - "label3": { - "simple_interpolation": "\"prefix:${var}-suffix\"", - "embedded_interpolation": "\"(long substring without interpolation); ${\"aaa-${local}-${local}\"}/us-west-2/key_foo\"", - "deeply_nested_interpolation": "\"prefix1-${\"prefix2-${\"prefix3-$${foo:bar}\"}\"}\"", - "escaped_interpolation": "\"prefix:$${aws:username}-suffix\"", - "simple_and_escaped": "\"${\"bar\"}$${baz:bat}\"", - "simple_and_escaped_reversed": "\"$${baz:bat}${\"bar\"}\"", - "nested_escaped": "\"bar-${\"$${baz:bat}\"}\"", - "__is_block__": true - } - } - }, { "route53_forwarding_rule_shares": "${{for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) : \"${forwarding_rule_key}\" => {aws_account_ids = [for account_name in var.route53_resolver_forwarding_rule_shares[forwarding_rule_key].aws_account_names : module.remote_state_subaccounts.map[account_name].outputs[\"aws_account_id\"]]}... if substr(bucket_name, 0, 1) == \"l\"}}", "__is_block__": true diff --git a/test/integration/json_reserialized/string_interpolations.json b/test/integration/json_reserialized/string_interpolations.json new file mode 100644 index 00000000..059fcfbf --- /dev/null +++ b/test/integration/json_reserialized/string_interpolations.json @@ -0,0 +1,18 @@ +{ + "block": [ + { + "label1": { + "label3": { + "simple_interpolation": "\"prefix:${var}-suffix\"", + "embedded_interpolation": "\"(long substring without interpolation); ${\"aaa-${local}-${local}\"}/us-west-2/key_foo\"", + "deeply_nested_interpolation": "\"prefix1-${\"prefix2-${\"prefix3-$${foo:bar}\"}\"}\"", + "escaped_interpolation": "\"prefix:$${aws:username}-suffix\"", + "simple_and_escaped": "\"${\"bar\"}$${baz:bat}\"", + "simple_and_escaped_reversed": "\"$${baz:bat}${\"bar\"}\"", + "nested_escaped": "\"bar-${\"$${baz:bat}\"}\"", + "__is_block__": true + } + } + } + ] +} \ No newline at end of file diff --git a/test/integration/json_reserialized/unicode_strings.json b/test/integration/json_reserialized/unicode_strings.json new file mode 100644 index 00000000..5f8f0095 --- /dev/null +++ b/test/integration/json_reserialized/unicode_strings.json @@ -0,0 +1,21 @@ +{ + "locals": [ + { + "basic_unicode": "\"Hello, \u4e16\u754c! \u3053\u3093\u306b\u3061\u306f \u041f\u0440\u0438\u0432\u0435\u0442 \u0928\u092e\u0938\u094d\u0924\u0947\"", + "unicode_escapes": "\"\u00a9 \u2665 \u266a \u2620 \u263a\"", + "emoji_string": "\"\ud83d\ude80 \ud83c\udf0d \ud83d\udd25 \ud83c\udf89\"", + "rtl_text": "\"English and \u0627\u0644\u0639\u0631\u0628\u064a\u0629 text mixed\"", + "complex_unicode": "\"Python (\ud30c\uc774\uc36c) es \u5f88\u68d2\u7684! \u2665 \u03b1\u03b2\u03b3\u03b4\"", + "ascii": "\"ASCII: abc123\"", + "emoji": "\"Emoji: \ud83d\ude80\ud83c\udf0d\ud83d\udd25\ud83c\udf89\"", + "math": "\"Math: \u2211\u222b\u221a\u221e\u2260\u2264\u2265\"", + "currency": "\"Currency: \u00a3\u20ac\u00a5\u20b9\u20bd\u20a9\"", + "arrows": "\"Arrows: \u2190\u2191\u2192\u2193\u2194\u2195\"", + "cjk": "\"CJK: \u4f60\u597d\u4e16\u754c\uc548\ub155\ud558\uc138\uc694\u3053\u3093\u306b\u3061\u306f\"", + "cyrillic": "\"Cyrillic: \u041f\u0440\u0438\u0432\u0435\u0442 \u043c\u0438\u0440\"", + "special": "\"Special: \u00a9\u00ae\u2122\u00a7\u00b6\u2020\u2021\"", + "mixed_content": "\"<<-EOT\n Line with interpolation: ${var.name}\n Line with emoji: \ud83d\udc68\u200d\ud83d\udc69\u200d\ud83d\udc67\u200d\ud83d\udc66\n Line with quotes: \"quoted text\"\n Line with backslash: \\escaped\n EOT\"", + "__is_block__": true + } + ] +} diff --git a/test/integration/json_serialized/floats.json b/test/integration/json_serialized/floats.json new file mode 100644 index 00000000..18078a18 --- /dev/null +++ b/test/integration/json_serialized/floats.json @@ -0,0 +1,31 @@ +{ + "locals": [ + { + "simple_float": 123.456, + "small_float": 0.123, + "large_float": 9876543.21, + "negative_float": -42.5, + "negative_small": -0.001, + "scientific_positive": 123000.0, + "scientific_negative": 0.00987, + "scientific_large": 6.022e+23, + "integer_as_float": 100.0, + "float_calculation": "${10500.0 * 3.0 / 2.1}", + "float_comparison": "${50.0 > 2.3 ? 1.0 : 0.0}", + "float_list": [ + 1.1, + 2.2, + 3.3, + -4.4, + 550.0 + ], + "float_object": { + "pi": 3.14159, + "euler": 2.71828, + "sqrt2": 1.41421, + "scientific": -12300.0 + }, + "__is_block__": true + } + ] +} diff --git a/test/integration/json_serialized/nulls.json b/test/integration/json_serialized/nulls.json new file mode 100644 index 00000000..9cbdd755 --- /dev/null +++ b/test/integration/json_serialized/nulls.json @@ -0,0 +1,13 @@ +{ + "terraform": { + "unary": "${!null}", + "binary": "${(a == null)}", + "tuple": [ + "null", + 1, + 2 + ], + "single": "null", + "conditional": "${null ? null : null}" + } +} diff --git a/test/integration/json_serialized/object_keys.json b/test/integration/json_serialized/object_keys.json new file mode 100644 index 00000000..8acccdea --- /dev/null +++ b/test/integration/json_serialized/object_keys.json @@ -0,0 +1,10 @@ +{ + "bar": { + "0": 0, + "\"foo\"": 1, + "baz": 2, + "${(var.account)}": 3, + "${(format(\"key_prefix_%s\", local.foo))}": 4, + "\"prefix_${var.account}:${var.user}_suffix\"": 5 + } +} diff --git a/test/integration/json_serialized/resource_keyword_attribute.json b/test/integration/json_serialized/resource_keyword_attribute.json new file mode 100644 index 00000000..6826a0b8 --- /dev/null +++ b/test/integration/json_serialized/resource_keyword_attribute.json @@ -0,0 +1,17 @@ +{ + "resource": [ + { + "\"custom_provider_resource\"": { + "\"resource_name\"": { + "name": "\"resource_name\"", + "attribute": "\"attribute_value\"", + "if": "\"attribute_value2\"", + "in": "\"attribute_value3\"", + "for": "\"attribute_value4\"", + "for_each": "\"attribute_value5\"", + "__is_block__": true + } + } + } + ] +} diff --git a/test/integration/json_serialized/smoke.json b/test/integration/json_serialized/smoke.json index 48544f85..670c5be3 100644 --- a/test/integration/json_serialized/smoke.json +++ b/test/integration/json_serialized/smoke.json @@ -48,20 +48,6 @@ } } }, - { - "label1": { - "label3": { - "simple_interpolation": "\"prefix:${var}-suffix\"", - "embedded_interpolation": "\"(long substring without interpolation); ${\"aaa-${local}-${local}\"}/us-west-2/key_foo\"", - "deeply_nested_interpolation": "\"prefix1-${\"prefix2-${\"prefix3-$${foo:bar}\"}\"}\"", - "escaped_interpolation": "\"prefix:$${aws:username}-suffix\"", - "simple_and_escaped": "\"${\"bar\"}$${baz:bat}\"", - "simple_and_escaped_reversed": "\"$${baz:bat}${\"bar\"}\"", - "nested_escaped": "\"bar-${\"$${baz:bat}\"}\"", - "__is_block__": true - } - } - }, { "route53_forwarding_rule_shares": "${{for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) : \"${forwarding_rule_key}\" => {aws_account_ids = [for account_name in var.route53_resolver_forwarding_rule_shares[forwarding_rule_key].aws_account_names : module.remote_state_subaccounts.map[account_name].outputs[\"aws_account_id\"]]}... if substr(bucket_name, 0, 1) == \"l\"}}", "__is_block__": true diff --git a/test/integration/json_serialized/string_interpolations.json b/test/integration/json_serialized/string_interpolations.json new file mode 100644 index 00000000..059fcfbf --- /dev/null +++ b/test/integration/json_serialized/string_interpolations.json @@ -0,0 +1,18 @@ +{ + "block": [ + { + "label1": { + "label3": { + "simple_interpolation": "\"prefix:${var}-suffix\"", + "embedded_interpolation": "\"(long substring without interpolation); ${\"aaa-${local}-${local}\"}/us-west-2/key_foo\"", + "deeply_nested_interpolation": "\"prefix1-${\"prefix2-${\"prefix3-$${foo:bar}\"}\"}\"", + "escaped_interpolation": "\"prefix:$${aws:username}-suffix\"", + "simple_and_escaped": "\"${\"bar\"}$${baz:bat}\"", + "simple_and_escaped_reversed": "\"$${baz:bat}${\"bar\"}\"", + "nested_escaped": "\"bar-${\"$${baz:bat}\"}\"", + "__is_block__": true + } + } + } + ] +} \ No newline at end of file diff --git a/test/integration/json_serialized/unicode_strings.json b/test/integration/json_serialized/unicode_strings.json new file mode 100644 index 00000000..5f8f0095 --- /dev/null +++ b/test/integration/json_serialized/unicode_strings.json @@ -0,0 +1,21 @@ +{ + "locals": [ + { + "basic_unicode": "\"Hello, \u4e16\u754c! \u3053\u3093\u306b\u3061\u306f \u041f\u0440\u0438\u0432\u0435\u0442 \u0928\u092e\u0938\u094d\u0924\u0947\"", + "unicode_escapes": "\"\u00a9 \u2665 \u266a \u2620 \u263a\"", + "emoji_string": "\"\ud83d\ude80 \ud83c\udf0d \ud83d\udd25 \ud83c\udf89\"", + "rtl_text": "\"English and \u0627\u0644\u0639\u0631\u0628\u064a\u0629 text mixed\"", + "complex_unicode": "\"Python (\ud30c\uc774\uc36c) es \u5f88\u68d2\u7684! \u2665 \u03b1\u03b2\u03b3\u03b4\"", + "ascii": "\"ASCII: abc123\"", + "emoji": "\"Emoji: \ud83d\ude80\ud83c\udf0d\ud83d\udd25\ud83c\udf89\"", + "math": "\"Math: \u2211\u222b\u221a\u221e\u2260\u2264\u2265\"", + "currency": "\"Currency: \u00a3\u20ac\u00a5\u20b9\u20bd\u20a9\"", + "arrows": "\"Arrows: \u2190\u2191\u2192\u2193\u2194\u2195\"", + "cjk": "\"CJK: \u4f60\u597d\u4e16\u754c\uc548\ub155\ud558\uc138\uc694\u3053\u3093\u306b\u3061\u306f\"", + "cyrillic": "\"Cyrillic: \u041f\u0440\u0438\u0432\u0435\u0442 \u043c\u0438\u0440\"", + "special": "\"Special: \u00a9\u00ae\u2122\u00a7\u00b6\u2020\u2021\"", + "mixed_content": "\"<<-EOT\n Line with interpolation: ${var.name}\n Line with emoji: \ud83d\udc68\u200d\ud83d\udc69\u200d\ud83d\udc67\u200d\ud83d\udc66\n Line with quotes: \"quoted text\"\n Line with backslash: \\escaped\n EOT\"", + "__is_block__": true + } + ] +} diff --git a/test/unit/rules/test_containers.py b/test/unit/rules/test_containers.py index b49b3f38..5ae28df4 100644 --- a/test/unit/rules/test_containers.py +++ b/test/unit/rules/test_containers.py @@ -171,11 +171,11 @@ def test_serialize_identifier(self): def test_serialize_int_lit(self): rule = ObjectElemKeyRule([IntLitRule([IntLiteral("5")])]) - self.assertEqual(rule.serialize(), 5) + self.assertEqual(rule.serialize(), "5") def test_serialize_float_lit(self): rule = ObjectElemKeyRule([FloatLitRule([FloatLiteral("3.14")])]) - self.assertAlmostEqual(rule.serialize(), 3.14) + self.assertEqual(rule.serialize(), "3.14") def test_serialize_string(self): rule = ObjectElemKeyRule([_make_string_rule("k3")])