diff --git a/.coveragerc b/.coveragerc index 4facabdc..3907df05 100644 --- a/.coveragerc +++ b/.coveragerc @@ -3,7 +3,10 @@ branch = true omit = hcl2/__main__.py hcl2/lark_parser.py + hcl2/version.py + hcl2/__init__.py + hcl2/rules/__init__.py [report] show_missing = true -fail_under = 80 +fail_under = 90 diff --git a/hcl2/__init__.py b/hcl2/__init__.py index 62f5a198..d3a9ea7b 100644 --- a/hcl2/__init__.py +++ b/hcl2/__init__.py @@ -8,11 +8,21 @@ from .api import ( load, loads, + dump, + dumps, parse, parses, + parse_to_tree, + parses_to_tree, + from_dict, + from_json, + reconstruct, transform, - reverse_transform, - writes, + serialize, ) from .builder import Builder +from .deserializer import DeserializerOptions +from .formatter import FormatterOptions +from .rules.base import StartRule +from .utils import SerializationOptions diff --git a/hcl2/__main__.py b/hcl2/__main__.py index 17a021e1..f1a58938 100644 --- a/hcl2/__main__.py +++ b/hcl2/__main__.py @@ -19,6 +19,7 @@ from lark import UnexpectedCharacters, UnexpectedToken from . import load +from .utils import SerializationOptions from .version import __version__ @@ -58,7 +59,8 @@ def main(): else open(args.OUT_PATH, "w", encoding="utf-8") ) print(args.PATH, file=sys.stderr, flush=True) - json.dump(load(in_file, with_meta=args.with_meta), out_file) + options = SerializationOptions(with_meta=True) if args.with_meta else None + json.dump(load(in_file, serialization_options=options), out_file) if args.OUT_PATH is None: out_file.write("\n") out_file.close() diff --git a/hcl2/api.py b/hcl2/api.py index 399ba929..0238f418 100644 --- a/hcl2/api.py +++ b/hcl2/api.py @@ -1,67 +1,205 @@ -"""The API that will be exposed to users of this package""" -from typing import TextIO +"""The API that will be exposed to users of this package. + +Follows the json module convention: load/loads for reading, dump/dumps for writing. +Also exposes intermediate pipeline stages for advanced usage. +""" + +import json as _json +from typing import TextIO, Optional from lark.tree import Tree -from hcl2.parser import parser, reconstruction_parser -from hcl2.transformer import DictTransformer -from hcl2.reconstructor import HCLReconstructor, HCLReverseTransformer +from hcl2.deserializer import BaseDeserializer, DeserializerOptions +from hcl2.formatter import BaseFormatter, FormatterOptions +from hcl2.parser import parser as _get_parser +from hcl2.reconstructor import HCLReconstructor +from hcl2.rules.base import StartRule +from hcl2.transformer import RuleTransformer +from hcl2.utils import SerializationOptions + + +# --------------------------------------------------------------------------- +# Primary API: load / loads / dump / dumps +# --------------------------------------------------------------------------- + + +def load( + file: TextIO, + *, + serialization_options: Optional[SerializationOptions] = None, +) -> dict: + """Load a HCL2 file and return a Python dict. + + :param file: File with HCL2 content. + :param serialization_options: Options controlling serialization behavior. + """ + return loads(file.read(), serialization_options=serialization_options) + + +def loads( + text: str, + *, + serialization_options: Optional[SerializationOptions] = None, +) -> dict: + """Load HCL2 from a string and return a Python dict. + + :param text: HCL2 text. + :param serialization_options: Options controlling serialization behavior. + """ + tree = parses(text) + return serialize(tree, serialization_options=serialization_options) + + +def dump( + data: dict, + file: TextIO, + *, + deserializer_options: Optional[DeserializerOptions] = None, + formatter_options: Optional[FormatterOptions] = None, +) -> None: + """Write a Python dict as HCL2 to a file. + + :param data: Python dict (as produced by :func:`load`). + :param file: Writable text file. + :param deserializer_options: Options controlling deserialization behavior. + :param formatter_options: Options controlling formatting behavior. + """ + file.write(dumps(data, deserializer_options=deserializer_options, formatter_options=formatter_options)) + + +def dumps( + data: dict, + *, + deserializer_options: Optional[DeserializerOptions] = None, + formatter_options: Optional[FormatterOptions] = None, +) -> str: + """Convert a Python dict to an HCL2 string. + + :param data: Python dict (as produced by :func:`load`). + :param deserializer_options: Options controlling deserialization behavior. + :param formatter_options: Options controlling formatting behavior. + """ + tree = from_dict(data, deserializer_options=deserializer_options, formatter_options=formatter_options) + return reconstruct(tree) + + +# --------------------------------------------------------------------------- +# Parsing: HCL text -> LarkElement tree or raw Lark tree +# --------------------------------------------------------------------------- + + +def parse(file: TextIO, *, discard_comments: bool = False) -> StartRule: + """Parse a HCL2 file into a LarkElement tree. + + :param file: File with HCL2 content. + :param discard_comments: If True, discard comments during transformation. + """ + return parses(file.read(), discard_comments=discard_comments) + + +def parses(text: str, *, discard_comments: bool = False) -> StartRule: + """Parse a HCL2 string into a LarkElement tree. + + :param text: HCL2 text. + :param discard_comments: If True, discard comments during transformation. + """ + lark_tree = parses_to_tree(text) + return transform(lark_tree, discard_comments=discard_comments) + + +def parse_to_tree(file: TextIO) -> Tree: + """Parse a HCL2 file into a raw Lark parse tree. -def load(file: TextIO, with_meta=False) -> dict: - """Load a HCL2 file. - :param file: File with hcl2 to be loaded as a dict. - :param with_meta: If set to true then adds `__start_line__` and `__end_line__` - parameters to the output dict. Default to false. + :param file: File with HCL2 content. """ - return loads(file.read(), with_meta=with_meta) + return parses_to_tree(file.read()) + +def parses_to_tree(text: str) -> Tree: + """Parse a HCL2 string into a raw Lark parse tree. -def loads(text: str, with_meta=False) -> dict: - """Load HCL2 from a string. - :param text: Text with hcl2 to be loaded as a dict. - :param with_meta: If set to true then adds `__start_line__` and `__end_line__` - parameters to the output dict. Default to false. + :param text: HCL2 text. """ - # append new line as a workaround for https://github.com/lark-parser/lark/issues/237 + # Append newline as workaround for https://github.com/lark-parser/lark/issues/237 # Lark doesn't support EOF token so our grammar can't look for "new line or end of file" - # This means that all blocks must end in a new line even if the file ends - # Append a new line as a temporary fix - tree = parser().parse(text + "\n") - return DictTransformer(with_meta=with_meta).transform(tree) + return _get_parser().parse(text + "\n") + + +# --------------------------------------------------------------------------- +# Intermediate pipeline stages +# --------------------------------------------------------------------------- -def parse(file: TextIO) -> Tree: - """Load HCL2 syntax tree from a file. - :param file: File with hcl2 to be loaded as a dict. +def from_dict( + data: dict, + *, + deserializer_options: Optional[DeserializerOptions] = None, + formatter_options: Optional[FormatterOptions] = None, + format: bool = True, +) -> StartRule: + """Convert a Python dict into a LarkElement tree. + + :param data: Python dict (as produced by :func:`load`). + :param deserializer_options: Options controlling deserialization behavior. + :param formatter_options: Options controlling formatting behavior. + :param format: If True (default), apply formatting to the tree. """ - return parses(file.read()) + deserializer = BaseDeserializer(deserializer_options) + tree = deserializer.load_python(data) + if format: + formatter = BaseFormatter(formatter_options) + formatter.format_tree(tree) + return tree + +def from_json( + text: str, + *, + deserializer_options: Optional[DeserializerOptions] = None, + formatter_options: Optional[FormatterOptions] = None, + format: bool = True, +) -> StartRule: + """Convert a JSON string into a LarkElement tree. -def parses(text: str) -> Tree: - """Load HCL2 syntax tree from a string. - :param text: Text with hcl2 to be loaded as a dict. + :param text: JSON string. + :param deserializer_options: Options controlling deserialization behavior. + :param formatter_options: Options controlling formatting behavior. + :param format: If True (default), apply formatting to the tree. """ - return reconstruction_parser().parse(text) + data = _json.loads(text) + return from_dict(data, deserializer_options=deserializer_options, formatter_options=formatter_options, format=format) + +def reconstruct(tree) -> str: + """Convert a LarkElement tree (or raw Lark tree) to an HCL2 string. -def transform(ast: Tree, with_meta=False) -> dict: - """Convert an HCL2 AST to a dictionary. - :param ast: HCL2 syntax tree, output from `parse` or `parses` - :param with_meta: If set to true then adds `__start_line__` and `__end_line__` - parameters to the output dict. Default to false. + :param tree: A :class:`StartRule` (LarkElement tree) or :class:`lark.Tree`. """ - return DictTransformer(with_meta=with_meta).transform(ast) + reconstructor = HCLReconstructor() + if isinstance(tree, StartRule): + tree = tree.to_lark() + return reconstructor.reconstruct(tree) -def reverse_transform(hcl2_dict: dict) -> Tree: - """Convert a dictionary to an HCL2 AST. - :param hcl2_dict: a dictionary produced by `load` or `transform` +def transform(lark_tree: Tree, *, discard_comments: bool = False) -> StartRule: + """Transform a raw Lark parse tree into a LarkElement tree. + + :param lark_tree: Raw Lark tree from :func:`parse_to_tree` or :func:`parse_string_to_tree`. + :param discard_comments: If True, discard comments during transformation. """ - return HCLReverseTransformer().transform(hcl2_dict) + return RuleTransformer(discard_new_line_or_comments=discard_comments).transform(lark_tree) + +def serialize( + tree: StartRule, + *, + serialization_options: Optional[SerializationOptions] = None, +) -> dict: + """Serialize a LarkElement tree to a Python dict. -def writes(ast: Tree) -> str: - """Convert an HCL2 syntax tree to a string. - :param ast: HCL2 syntax tree, output from `parse` or `parses` + :param tree: A :class:`StartRule` (LarkElement tree). + :param serialization_options: Options controlling serialization behavior. """ - return HCLReconstructor(reconstruction_parser()).reconstruct(ast) + if serialization_options is not None: + return tree.serialize(options=serialization_options) + return tree.serialize() diff --git a/hcl2/builder.py b/hcl2/builder.py index b5b149da..5ef0c416 100644 --- a/hcl2/builder.py +++ b/hcl2/builder.py @@ -3,18 +3,16 @@ from collections import defaultdict -from hcl2.const import START_LINE_KEY, END_LINE_KEY +from hcl2.const import IS_BLOCK class Builder: """ The `hcl2.Builder` class produces a dictionary that should be identical to the - output of `hcl2.load(example_file, with_meta=True)`. The `with_meta` keyword - argument is important here. HCL "blocks" in the Python dictionary are - identified by the presence of `__start_line__` and `__end_line__` metadata - within them. The `Builder` class handles adding that metadata. If that metadata - is missing, the `hcl2.reconstructor.HCLReverseTransformer` class fails to - identify what is a block and what is just an attribute with an object value. + output of `hcl2.load(example_file)`. HCL "blocks" in the Python dictionary are + identified by the presence of `__is_block__: True` markers within them. + The `Builder` class handles adding that marker. If that marker is missing, + the deserializer fails to distinguish blocks from regular object attributes. """ def __init__(self, attributes: Optional[dict] = None): @@ -49,8 +47,7 @@ def build(self): body.update( { - START_LINE_KEY: -1, - END_LINE_KEY: -1, + IS_BLOCK: True, **self.attributes, } ) @@ -79,7 +76,7 @@ def _add_nested_blocks( """Add nested blocks defined within another `Builder` instance to the `block` dictionary""" nested_block = nested_blocks_builder.build() for key, value in nested_block.items(): - if key not in (START_LINE_KEY, END_LINE_KEY): + if key != IS_BLOCK: if key not in block.keys(): block[key] = [] block[key].extend(value) diff --git a/hcl2/const.py b/hcl2/const.py index 1d46f35a..1bd4a4ce 100644 --- a/hcl2/const.py +++ b/hcl2/const.py @@ -2,3 +2,4 @@ START_LINE_KEY = "__start_line__" END_LINE_KEY = "__end_line__" +IS_BLOCK = "__is_block__" diff --git a/hcl2/deserializer.py b/hcl2/deserializer.py new file mode 100644 index 00000000..a1f9733e --- /dev/null +++ b/hcl2/deserializer.py @@ -0,0 +1,347 @@ +import json +from abc import ABC, abstractmethod +from dataclasses import dataclass +from functools import cached_property +from typing import Any, TextIO, List, Union + +from regex import regex + +from hcl2.parser import parser as _get_parser +from hcl2.const import IS_BLOCK +from hcl2.rules.abstract import LarkElement, LarkRule +from hcl2.rules.base import ( + BlockRule, + AttributeRule, + BodyRule, + StartRule, +) +from hcl2.rules.containers import ( + TupleRule, + ObjectRule, + ObjectElemRule, + ObjectElemKeyExpressionRule, + ObjectElemKeyDotAccessor, + ObjectElemKeyRule, +) +from hcl2.rules.expressions import ExprTermRule +from hcl2.rules.literal_rules import ( + IdentifierRule, + IntLitRule, + FloatLitRule, +) +from hcl2.rules.strings import ( + StringRule, + InterpolationRule, + StringPartRule, + HeredocTemplateRule, + HeredocTrimTemplateRule, +) +from hcl2.rules.tokens import ( + NAME, + EQ, + DBLQUOTE, + STRING_CHARS, + ESCAPED_INTERPOLATION, + INTERP_START, + RBRACE, + IntLiteral, + FloatLiteral, + RSQB, + LSQB, + COMMA, + DOT, + LBRACE, + HEREDOC_TRIM_TEMPLATE, + HEREDOC_TEMPLATE, + COLON, +) +from hcl2.transformer import RuleTransformer +from hcl2.utils import HEREDOC_TRIM_PATTERN, HEREDOC_PATTERN + + +@dataclass +class DeserializerOptions: + heredocs_to_strings: bool = False + indent_length: int = 2 + object_elements_colon: bool = False + object_elements_trailing_comma: bool = True + + +class LarkElementTreeDeserializer(ABC): + def __init__(self, options: DeserializerOptions = None): + self.options = options or DeserializerOptions() + + @abstractmethod + def loads(self, value: str) -> LarkElement: + raise NotImplementedError() + + def load(self, file: TextIO) -> LarkElement: + return self.loads(file.read()) + + +class BaseDeserializer(LarkElementTreeDeserializer): + def __init__(self, options=None): + super().__init__(options) + + @cached_property + def _transformer(self) -> RuleTransformer: + return RuleTransformer() + + def load_python(self, value: Any) -> LarkElement: + if isinstance(value, dict): + # Top-level dict is always a body (attributes + blocks), not an object + children = self._deserialize_block_elements(value) + result = StartRule([BodyRule(children)]) + else: + result = StartRule([self._deserialize(value)]) + return result + + def loads(self, value: str) -> LarkElement: + return self.load_python(json.loads(value)) + + def _deserialize(self, value: Any) -> LarkElement: + if isinstance(value, dict): + if self._contains_block_marker(value): + + children = [] + + block_elements = self._deserialize_block_elements(value) + for element in block_elements: + children.append(element) + + return BodyRule(children) + + return self._deserialize_object(value) + + if isinstance(value, list): + return self._deserialize_list(value) + + return self._deserialize_text(value) + + def _deserialize_block_elements(self, value: dict) -> List[LarkRule]: + children = [] + for key, val in value.items(): + if self._is_block(val): + # this value is a list of blocks, iterate over each block and deserialize them + for block in val: + children.append(self._deserialize_block(key, block)) + + else: + # otherwise it's just an attribute + if key != IS_BLOCK: + children.append(self._deserialize_attribute(key, val)) + + return children + + def _deserialize_text(self, value: Any) -> LarkRule: + # bool must be checked before int since bool is a subclass of int + if isinstance(value, bool): + return self._deserialize_identifier(str(value).lower()) + + if isinstance(value, float): + return FloatLitRule([FloatLiteral(value)]) + + if isinstance(value, int): + return IntLitRule([IntLiteral(value)]) + + if isinstance(value, str): + if value.startswith('"') and value.endswith('"'): + if not self.options.heredocs_to_strings and value.startswith('"<<-'): + match = HEREDOC_TRIM_PATTERN.match(value[1:-1]) + if match: + return self._deserialize_heredoc(value[1:-1], True) + + if not self.options.heredocs_to_strings and value.startswith('"<<'): + match = HEREDOC_PATTERN.match(value[1:-1]) + if match: + return self._deserialize_heredoc(value[1:-1], False) + + return self._deserialize_string(value) + + if self._is_expression(value): + return self._deserialize_expression(value) + + return self._deserialize_identifier(value) + + return self._deserialize_identifier(str(value)) + + def _deserialize_identifier(self, value: str) -> IdentifierRule: + return IdentifierRule([NAME(value)]) + + def _deserialize_string(self, value: str) -> StringRule: + result = [] + # split string into individual parts based on lark grammar + # e.g. 'aaa$${bbb}ccc${"ddd-${eee}"}' -> ['aaa', '$${bbb}', 'ccc', '${"ddd-${eee}"}'] + # 'aa-${"bb-${"cc-${"dd-${5 + 5}"}"}"}' -> ['aa-', '${"bb-${"cc-${"dd-${5 + 5}"}"}"}'] + pattern = regex.compile(r"(\${1,2}\{(?:[^{}]|(?R))*\})") + parts = [part for part in pattern.split(value) if part != ""] + + for part in parts: + if part == '"': + continue + + if part.startswith('"'): + part = part[1:] + if part.endswith('"'): + part = part[:-1] + + e = self._deserialize_string_part(part) + result.append(e) + + return StringRule([DBLQUOTE(), *result, DBLQUOTE()]) + + def _deserialize_string_part(self, value: str) -> StringPartRule: + if value.startswith("$${") and value.endswith("}"): + return StringPartRule([ESCAPED_INTERPOLATION(value)]) + + if value.startswith("${") and value.endswith("}"): + return StringPartRule( + [ + InterpolationRule( + [INTERP_START(), self._deserialize_expression(value), RBRACE()] + ) + ] + ) + + return StringPartRule([STRING_CHARS(value)]) + + def _deserialize_heredoc( + self, value: str, trim: bool + ) -> Union[HeredocTemplateRule, HeredocTrimTemplateRule]: + if trim: + return HeredocTrimTemplateRule([HEREDOC_TRIM_TEMPLATE(value)]) + return HeredocTemplateRule([HEREDOC_TEMPLATE(value)]) + + def _deserialize_expression(self, value: str) -> ExprTermRule: + """Deserialize an expression string into an ExprTermRule.""" + # instead of processing expression manually and trying to recognize what kind of expression it is, + # turn it into HCL2 code and parse it with lark: + + # unwrap from ${ and } + value = value[2:-1] + # create HCL2 snippet + value = f"temp = {value}" + # parse the above + parsed_tree = _get_parser().parse(value) + # transform parsed tree into LarkElement tree + rules_tree = self._transformer.transform(parsed_tree) + # extract expression from the tree + result = rules_tree.body.children[0].expression + + return result + + def _deserialize_block(self, first_label: str, value: dict) -> BlockRule: + """Deserialize a block by extracting labels and body""" + labels = [first_label] + body = value + + # Keep peeling off single-key layers until we hit the body (dict with IS_BLOCK) + while isinstance(body, dict) and not body.get(IS_BLOCK): + non_block_keys = [k for k in body.keys() if k != IS_BLOCK] + if len(non_block_keys) == 1: + # This is another label level + label = non_block_keys[0] + labels.append(label) + body = body[label] + else: + # Multiple keys = this is the body + break + + return BlockRule( + [ + *[self._deserialize(label) for label in labels], + LBRACE(), + self._deserialize(body), + RBRACE(), + ] + ) + + def _deserialize_attribute(self, name: str, value: Any) -> AttributeRule: + expr_term = self._deserialize(value) + + if not isinstance(expr_term, ExprTermRule): + expr_term = ExprTermRule([expr_term]) + + children = [ + self._deserialize_identifier(name), + EQ(), + expr_term, + ] + return AttributeRule(children) + + def _deserialize_list(self, value: List) -> TupleRule: + children = [] + for element in value: + deserialized = self._deserialize(element) + if not isinstance(deserialized, ExprTermRule): + # whatever an element of the list is, it has to be nested inside ExprTermRule + deserialized = ExprTermRule([deserialized]) + children.append(deserialized) + children.append(COMMA()) + + return TupleRule([LSQB(), *children, RSQB()]) + + def _deserialize_object(self, value: dict) -> ObjectRule: + children = [] + for key, val in value.items(): + children.append(self._deserialize_object_elem(key, val)) + + if self.options.object_elements_trailing_comma: + children.append(COMMA()) + + return ObjectRule([LBRACE(), *children, RBRACE()]) + + def _deserialize_object_elem(self, key: Any, value: Any) -> ObjectElemRule: + if self._is_expression(key): + key = ObjectElemKeyExpressionRule( + [ + child + for child in self._deserialize_expression(key).children + if child is not None + ] + ) + elif isinstance(key, str) and "." in key: + parts = key.split(".") + children = [] + for part in parts: + children.append(self._deserialize_identifier(part)) + children.append(DOT()) + key = ObjectElemKeyDotAccessor(children[:-1]) # without the last comma + else: + key = self._deserialize_text(key) + + result = [ + ObjectElemKeyRule([key]), + COLON() if self.options.object_elements_colon else EQ(), + ExprTermRule([self._deserialize(value)]), + ] + + return ObjectElemRule(result) + + def _is_expression(self, value: Any) -> bool: + return isinstance(value, str) and value.startswith("${") and value.endswith("}") + + def _is_block(self, value: Any) -> bool: + """Simple check: if it's a list containing dicts with IS_BLOCK markers""" + if not isinstance(value, list) or len(value) == 0: + return False + + # Check if any item in the list has IS_BLOCK marker (directly or nested) + for item in value: + if isinstance(item, dict) and self._contains_block_marker(item): + return True + + return False + + def _contains_block_marker(self, obj: dict) -> bool: + """Recursively check if a dict contains IS_BLOCK marker anywhere""" + if obj.get(IS_BLOCK): + return True + for value in obj.values(): + if isinstance(value, dict) and self._contains_block_marker(value): + return True + if isinstance(value, list): + for element in value: + if isinstance(element, dict) and self._contains_block_marker(element): + return True + return False diff --git a/hcl2/formatter.py b/hcl2/formatter.py new file mode 100644 index 00000000..35fb6b05 --- /dev/null +++ b/hcl2/formatter.py @@ -0,0 +1,239 @@ +from abc import ABC, abstractmethod +from dataclasses import dataclass +from typing import List + +from hcl2.rules.abstract import LarkElement +from hcl2.rules.base import ( + StartRule, + BlockRule, + AttributeRule, + BodyRule, +) +from hcl2.rules.containers import ObjectRule, ObjectElemRule, TupleRule +from hcl2.rules.expressions import ExprTermRule, ExpressionRule +from hcl2.rules.for_expressions import ( + ForTupleExprRule, + ForObjectExprRule, +) +from hcl2.rules.tokens import NL_OR_COMMENT, LBRACE, COLON, LSQB, COMMA +from hcl2.rules.whitespace import NewLineOrCommentRule + + +@dataclass +class FormatterOptions: + indent_length: int = 2 + open_empty_blocks: bool = True + open_empty_objects: bool = True + open_empty_tuples: bool = False + + vertically_align_attributes: bool = True + vertically_align_object_elements: bool = True + + +class LarkElementTreeFormatter(ABC): + def __init__(self, options: FormatterOptions = None): + self.options = options or FormatterOptions() + + @abstractmethod + def format_tree(self, tree: LarkElement): + raise NotImplementedError() + + +class BaseFormatter(LarkElementTreeFormatter): + def __init__(self, options: FormatterOptions = None): + super().__init__(options) + self._last_new_line: NewLineOrCommentRule = None + + def format_tree(self, tree: LarkElement): + if isinstance(tree, StartRule): + self.format_start_rule(tree) + + def format_start_rule(self, rule: StartRule): + self.format_body_rule(rule.body, 0) + + def format_block_rule(self, rule: BlockRule, indent_level: int = 0): + if self.options.vertically_align_attributes: + self._vertically_align_attributes_in_body(rule.body) + + self.format_body_rule(rule.body, indent_level) + if len(rule.body.children) > 0: + rule.children.insert(-1, self._build_newline(indent_level - 1)) + elif self.options.open_empty_blocks: + rule.children.insert(-1, self._build_newline(indent_level - 1, 2)) + + def format_body_rule(self, rule: BodyRule, indent_level: int = 0): + + in_start = isinstance(rule.parent, StartRule) + + new_children = [] + if not in_start: + new_children.append(self._build_newline(indent_level)) + + for i, child in enumerate(rule.children): + new_children.append(child) + + if isinstance(child, AttributeRule): + self.format_attribute_rule(child, indent_level) + new_children.append(self._build_newline(indent_level)) + + if isinstance(child, BlockRule): + self.format_block_rule(child, indent_level + 1) + + if i > 0: + new_children.insert(-2, self._build_newline(indent_level)) + new_children.append(self._build_newline(indent_level, 2)) + + if new_children: + new_children.pop(-1) + rule._children = new_children + + def format_attribute_rule(self, rule: AttributeRule, indent_level: int = 0): + self.format_expression(rule.expression, indent_level + 1) + + def format_tuple_rule(self, rule: TupleRule, indent_level: int = 0): + if len(rule.elements) == 0: + if self.options.open_empty_tuples: + rule.children.insert(1, self._build_newline(indent_level - 1, 2)) + return + + new_children = [] + for child in rule.children: + new_children.append(child) + if isinstance(child, ExprTermRule): + self.format_expression(child, indent_level + 1) + + if isinstance(child, (COMMA, LSQB)): + new_children.append(self._build_newline(indent_level)) + + self._deindent_last_line() + rule._children = new_children + + def format_object_rule(self, rule: ObjectRule, indent_level: int = 0): + if len(rule.elements) == 0: + if self.options.open_empty_objects: + rule.children.insert(1, self._build_newline(indent_level - 1, 2)) + return + + new_children = [] + for i in range(len(rule.children)): + child = rule.children[i] + next_child = rule.children[i + 1] if i + 1 < len(rule.children) else None + new_children.append(child) + + if isinstance(child, LBRACE): + new_children.append(self._build_newline(indent_level)) + + if ( + next_child + and isinstance(next_child, ObjectElemRule) + and isinstance(child, (ObjectElemRule, COMMA)) + ): + new_children.append(self._build_newline(indent_level)) + + if isinstance(child, ObjectElemRule): + self.format_expression(child.expression, indent_level + 1) + + new_children.insert(-1, self._build_newline(indent_level)) + self._deindent_last_line() + + rule._children = new_children + + if self.options.vertically_align_object_elements: + self._vertically_align_object_elems(rule) + + def format_expression(self, rule: ExprTermRule, indent_level: int = 0): + if isinstance(rule.expression, ObjectRule): + self.format_object_rule(rule.expression, indent_level) + + elif isinstance(rule.expression, TupleRule): + self.format_tuple_rule(rule.expression, indent_level) + + elif isinstance(rule.expression, ForTupleExprRule): + self.format_fortupleexpr(rule.expression, indent_level) + + elif isinstance(rule.expression, ForObjectExprRule): + self.format_forobjectexpr(rule.expression, indent_level) + + elif isinstance(rule.expression, ExprTermRule): + self.format_expression(rule.expression, indent_level) + + def format_fortupleexpr(self, expression: ForTupleExprRule, indent_level: int = 0): + for child in expression.children: + if isinstance(child, ExprTermRule): + self.format_expression(child, indent_level + 1) + + indexes = [1, 3, 5, 7] + for index in indexes: + expression.children[index] = self._build_newline(indent_level) + self._deindent_last_line() + + def format_forobjectexpr( + self, expression: ForObjectExprRule, indent_level: int = 0 + ): + for child in expression.children: + if isinstance(child, ExprTermRule): + self.format_expression(child, indent_level + 1) + + indexes = [1, 3, 12] + for index in indexes: + expression.children[index] = self._build_newline(indent_level) + + self._deindent_last_line() + + def _vertically_align_attributes_in_body(self, body: BodyRule): + attributes_sequence: List[AttributeRule] = [] + + for child in body.children: + if isinstance(child, AttributeRule): + attributes_sequence.append(child) + + elif attributes_sequence: + self._align_attributes_sequence(attributes_sequence) + attributes_sequence = [] + + if attributes_sequence: + self._align_attributes_sequence(attributes_sequence) + + def _align_attributes_sequence(self, attributes_sequence: List[AttributeRule]): + max_length = max( + len(attribute.identifier.token.value) + for attribute in attributes_sequence + ) + for attribute in attributes_sequence: + name_length = len(attribute.identifier.token.value) + spaces_to_add = max_length - name_length + attribute.children[1].set_value( + " " * spaces_to_add + attribute.children[1].value + ) + + def _vertically_align_object_elems(self, rule: ObjectRule): + max_length = max(len(elem.key.serialize()) for elem in rule.elements) + for elem in rule.elements: + key_length = len(elem.key.serialize()) + + spaces_to_add = max_length - key_length + + separator = elem.children[1] + if isinstance(separator, COLON): + spaces_to_add += 1 + + elem.children[1].set_value(" " * spaces_to_add + separator.value) + + def _build_newline( + self, next_line_indent: int = 0, count: int = 1 + ) -> NewLineOrCommentRule: + result = NewLineOrCommentRule( + [ + NL_OR_COMMENT( + ("\n" * count) + " " * self.options.indent_length * next_line_indent + ) + ] + ) + self._last_new_line = result + return result + + def _deindent_last_line(self, times: int = 1): + token = self._last_new_line.token + for i in range(times): + if token.value.endswith(" " * self.options.indent_length): + token.set_value(token.value[: -self.options.indent_length]) diff --git a/hcl2/hcl2.lark b/hcl2/hcl2.lark index 78ba3ca6..4a9f1ec6 100644 --- a/hcl2/hcl2.lark +++ b/hcl2/hcl2.lark @@ -1,27 +1,29 @@ -start : body -body : (new_line_or_comment? (attribute | block))* new_line_or_comment? -attribute : identifier EQ expression -block : identifier (identifier | string)* new_line_or_comment? "{" body "}" -new_line_or_comment: ( NL_OR_COMMENT )+ +// ============================================================================ +// Terminals +// ============================================================================ + +// Whitespace and Comments NL_OR_COMMENT: /\n[ \t]*/ | /#.*\n/ | /\/\/.*\n/ | /\/\*(.|\n)*?(\*\/)/ -identifier : NAME | IN | FOR | IF | FOR_EACH -NAME : /[a-zA-Z_][a-zA-Z0-9_-]*/ +// Keywords IF : "if" IN : "in" FOR : "for" FOR_EACH : "for_each" -?expression : expr_term | operation | conditional -conditional : expression "?" new_line_or_comment? expression new_line_or_comment? ":" new_line_or_comment? expression +// Literals +NAME : /[a-zA-Z_][a-zA-Z0-9_-]*/ +ESCAPED_INTERPOLATION.2: /\$\$\{[^}]*\}/ +STRING_CHARS.1: /(?:(?!\$\$\{)(?!\$\{)[^"\\]|\\.|(?:\$(?!\$?\{)))+/ +DECIMAL : "0".."9" +NEGATIVE_DECIMAL : "-" DECIMAL +EXP_MARK : ("e" | "E") ("+" | "-")? DECIMAL+ +INT_LITERAL: NEGATIVE_DECIMAL? DECIMAL+ +FLOAT_LITERAL: (NEGATIVE_DECIMAL? DECIMAL+ | NEGATIVE_DECIMAL+) "." DECIMAL+ (EXP_MARK)? + | (NEGATIVE_DECIMAL? DECIMAL+ | NEGATIVE_DECIMAL+) (EXP_MARK) -?operation : unary_op | binary_op -!unary_op : ("-" | "!") expr_term -binary_op : expression binary_term new_line_or_comment? -!binary_operator : BINARY_OP -binary_term : binary_operator new_line_or_comment? expression -BINARY_OP : DOUBLE_EQ | NEQ | LT | GT | LEQ | GEQ | MINUS | ASTERISK | SLASH | PERCENT | DOUBLE_AMP | DOUBLE_PIPE | PLUS +// Operators DOUBLE_EQ : "==" NEQ : "!=" LT : "<" @@ -35,74 +37,172 @@ PERCENT : "%" DOUBLE_AMP : "&&" DOUBLE_PIPE : "||" PLUS : "+" +NOT : "!" +QMARK : "?" + +// Punctuation LPAR : "(" RPAR : ")" +LBRACE : "{" +RBRACE : "}" +LSQB : "[" +RSQB : "]" COMMA : "," DOT : "." +EQ : /[ \t]*=(?!=|>)/ COLON : ":" +DBLQUOTE : "\"" + +// Interpolation +INTERP_START : "${" + +// Splat Operators +ATTR_SPLAT : ".*" +FULL_SPLAT_START : "[*]" + +// Special Operators +FOR_OBJECT_ARROW : "=>" +ELLIPSIS : "..." +COLONS: "::" + +// Heredocs +HEREDOC_TEMPLATE : /<<(?P[a-zA-Z][a-zA-Z0-9._-]+)\n(?:.|\n)*?\n\s*(?P=heredoc)\n/ +HEREDOC_TEMPLATE_TRIM : /<<-(?P[a-zA-Z][a-zA-Z0-9._-]+)\n(?:.|\n)*?\n\s*(?P=heredoc_trim)\n/ + +// Ignore whitespace (but not newlines, as they're significant in HCL) +%ignore /[ \t]+/ + +// ============================================================================ +// Rules +// ============================================================================ + +// Top-level structure +start : body + +// Body and basic constructs +body : (new_line_or_comment? (attribute | block))* new_line_or_comment? +attribute : _attribute_name EQ expression +_attribute_name : identifier | keyword +block : identifier (identifier | string)* new_line_or_comment? LBRACE body RBRACE + +// Whitespace and comments +new_line_or_comment: ( NL_OR_COMMENT )+ + +// Basic literals and identifiers +identifier : NAME +keyword: IN | FOR | IF | FOR_EACH +int_lit: INT_LITERAL +float_lit: FLOAT_LITERAL +string: DBLQUOTE string_part* DBLQUOTE +string_part: STRING_CHARS + | ESCAPED_INTERPOLATION + | interpolation + +// Expressions +?expression : or_expr QMARK new_line_or_comment? expression new_line_or_comment? COLON new_line_or_comment? expression -> conditional + | or_expr +interpolation: INTERP_START expression RBRACE + +// Operator precedence ladder (lowest to highest) +// Each level uses left recursion for left-associativity. +// Rule aliases (-> binary_op, -> binary_term, -> binary_operator) maintain +// transformer compatibility with BinaryOpRule / BinaryTermRule / BinaryOperatorRule. + +// Logical OR +?or_expr : or_expr or_binary_term new_line_or_comment? -> binary_op + | and_expr +or_binary_term : or_binary_operator new_line_or_comment? and_expr -> binary_term +!or_binary_operator : DOUBLE_PIPE -> binary_operator + +// Logical AND +?and_expr : and_expr and_binary_term new_line_or_comment? -> binary_op + | eq_expr +and_binary_term : and_binary_operator new_line_or_comment? eq_expr -> binary_term +!and_binary_operator : DOUBLE_AMP -> binary_operator + +// Equality +?eq_expr : eq_expr eq_binary_term new_line_or_comment? -> binary_op + | rel_expr +eq_binary_term : eq_binary_operator new_line_or_comment? rel_expr -> binary_term +!eq_binary_operator : DOUBLE_EQ -> binary_operator + | NEQ -> binary_operator + +// Relational +?rel_expr : rel_expr rel_binary_term new_line_or_comment? -> binary_op + | add_expr +rel_binary_term : rel_binary_operator new_line_or_comment? add_expr -> binary_term +!rel_binary_operator : LT -> binary_operator + | GT -> binary_operator + | LEQ -> binary_operator + | GEQ -> binary_operator + +// Additive +?add_expr : add_expr add_binary_term new_line_or_comment? -> binary_op + | mul_expr +add_binary_term : add_binary_operator new_line_or_comment? mul_expr -> binary_term +!add_binary_operator : PLUS -> binary_operator + | MINUS -> binary_operator +// Multiplicative +?mul_expr : mul_expr mul_binary_term new_line_or_comment? -> binary_op + | unary_expr +mul_binary_term : mul_binary_operator new_line_or_comment? unary_expr -> binary_term +!mul_binary_operator : ASTERISK -> binary_operator + | SLASH -> binary_operator + | PERCENT -> binary_operator + +// Unary (highest precedence for operations) +?unary_expr : unary_op | expr_term +!unary_op : (MINUS | NOT) expr_term + +// Expression terms expr_term : LPAR new_line_or_comment? expression new_line_or_comment? RPAR | float_lit | int_lit | string | tuple | object - | function_call - | index_expr_term - | get_attr_expr_term | identifier - | provider_function_call + | function_call | heredoc_template | heredoc_template_trim + | index_expr_term + | get_attr_expr_term | attr_splat_expr_term | full_splat_expr_term | for_tuple_expr | for_object_expr -string: "\"" string_part* "\"" -string_part: STRING_CHARS - | ESCAPED_INTERPOLATION - | interpolation -interpolation: "${" expression "}" -ESCAPED_INTERPOLATION.2: /\$\$\{[^}]*\}/ -STRING_CHARS.1: /(?:(?!\$\$\{)(?!\$\{)[^"\\]|\\.|(?:\$(?!\$?\{)))+/ - -int_lit : NEGATIVE_DECIMAL? DECIMAL+ | NEGATIVE_DECIMAL+ -!float_lit: (NEGATIVE_DECIMAL? DECIMAL+ | NEGATIVE_DECIMAL+) "." DECIMAL+ (EXP_MARK)? - | (NEGATIVE_DECIMAL? DECIMAL+ | NEGATIVE_DECIMAL+) ("." DECIMAL+)? (EXP_MARK) -NEGATIVE_DECIMAL : "-" DECIMAL -DECIMAL : "0".."9" -EXP_MARK : ("e" | "E") ("+" | "-")? DECIMAL+ -EQ : /[ \t]*=(?!=|>)/ - -tuple : "[" (new_line_or_comment* expression new_line_or_comment* ",")* (new_line_or_comment* expression)? new_line_or_comment* "]" -object : "{" new_line_or_comment? (new_line_or_comment* (object_elem | (object_elem COMMA)) new_line_or_comment*)* "}" +// Collections +tuple : LSQB new_line_or_comment? (expression new_line_or_comment? COMMA new_line_or_comment?)* (expression new_line_or_comment? COMMA? new_line_or_comment?)? RSQB +object : LBRACE new_line_or_comment? ((object_elem | (object_elem new_line_or_comment? COMMA)) new_line_or_comment?)* RBRACE object_elem : object_elem_key ( EQ | COLON ) expression object_elem_key : float_lit | int_lit | identifier | string | object_elem_key_dot_accessor | object_elem_key_expression object_elem_key_expression : LPAR expression RPAR object_elem_key_dot_accessor : identifier (DOT identifier)+ -heredoc_template : /<<(?P[a-zA-Z][a-zA-Z0-9._-]+)\n?(?:.|\n)*?\n\s*(?P=heredoc)\n/ -heredoc_template_trim : /<<-(?P[a-zA-Z][a-zA-Z0-9._-]+)\n?(?:.|\n)*?\n\s*(?P=heredoc_trim)\n/ +// Heredocs +heredoc_template : HEREDOC_TEMPLATE +heredoc_template_trim : HEREDOC_TEMPLATE_TRIM -function_call : identifier "(" new_line_or_comment? arguments? new_line_or_comment? ")" -arguments : (expression (new_line_or_comment* "," new_line_or_comment* expression)* ("," | "...")? new_line_or_comment*) -colons: "::" -provider_function_call: identifier colons identifier colons identifier "(" new_line_or_comment? arguments? new_line_or_comment? ")" +// Functions +function_call : identifier (COLONS identifier COLONS identifier)? LPAR new_line_or_comment? arguments? new_line_or_comment? RPAR +arguments : (expression (new_line_or_comment? COMMA new_line_or_comment? expression)* (COMMA | ELLIPSIS)? new_line_or_comment?) +// Indexing and attribute access index_expr_term : expr_term index get_attr_expr_term : expr_term get_attr attr_splat_expr_term : expr_term attr_splat full_splat_expr_term : expr_term full_splat -index : "[" new_line_or_comment? expression new_line_or_comment? "]" | "." DECIMAL+ -get_attr : "." identifier -attr_splat : ".*" get_attr* -full_splat : "[*]" (get_attr | index)* +?index : braces_index | short_index +braces_index : LSQB new_line_or_comment? expression new_line_or_comment? RSQB +short_index : DOT INT_LITERAL +get_attr : DOT identifier +attr_splat : ATTR_SPLAT (get_attr | index)* +full_splat : FULL_SPLAT_START (get_attr | index)* -FOR_OBJECT_ARROW : "=>" -!for_tuple_expr : "[" new_line_or_comment? for_intro new_line_or_comment? expression new_line_or_comment? for_cond? new_line_or_comment? "]" -!for_object_expr : "{" new_line_or_comment? for_intro new_line_or_comment? expression FOR_OBJECT_ARROW new_line_or_comment? expression new_line_or_comment? "..."? new_line_or_comment? for_cond? new_line_or_comment? "}" -!for_intro : "for" new_line_or_comment? identifier ("," identifier new_line_or_comment?)? new_line_or_comment? "in" new_line_or_comment? expression new_line_or_comment? ":" new_line_or_comment? -!for_cond : "if" new_line_or_comment? expression - -%ignore /[ \t]+/ +// For expressions +!for_tuple_expr : LSQB new_line_or_comment? for_intro new_line_or_comment? expression new_line_or_comment? for_cond? new_line_or_comment? RSQB +!for_object_expr : LBRACE new_line_or_comment? for_intro new_line_or_comment? expression FOR_OBJECT_ARROW new_line_or_comment? expression new_line_or_comment? ELLIPSIS? new_line_or_comment? for_cond? new_line_or_comment? RBRACE +!for_intro : FOR new_line_or_comment? identifier (COMMA identifier new_line_or_comment?)? new_line_or_comment? IN new_line_or_comment? expression new_line_or_comment? COLON new_line_or_comment? +!for_cond : IF new_line_or_comment? expression diff --git a/hcl2/parser.py b/hcl2/parser.py index 79d50122..a33fe5f8 100644 --- a/hcl2/parser.py +++ b/hcl2/parser.py @@ -18,25 +18,3 @@ def parser() -> Lark: rel_to=__file__, propagate_positions=True, ) - - -@functools.lru_cache() -def reconstruction_parser() -> Lark: - """ - Build parser for transforming python structures into HCL2 text. - This is duplicated from `parser` because we need different options here for - the reconstructor. Please make sure changes are kept in sync between the two - if necessary. - """ - return Lark.open( - "hcl2.lark", - parser="lalr", - # Caching must be disabled to allow for reconstruction until lark-parser/lark#1472 is fixed: - # - # https://github.com/lark-parser/lark/issues/1472 - # - # cache=str(PARSER_FILE), # Disable/Delete file to effect changes to the grammar - rel_to=__file__, - propagate_positions=True, - maybe_placeholders=False, # Needed for reconstruction - ) diff --git a/hcl2/reconstructor.py b/hcl2/reconstructor.py index 7f957d7b..1b5260ac 100644 --- a/hcl2/reconstructor.py +++ b/hcl2/reconstructor.py @@ -1,734 +1,238 @@ -"""A reconstructor for HCL2 implemented using Lark's experimental reconstruction functionality""" - -import re -from typing import List, Dict, Callable, Optional, Union, Any, Tuple - -from lark import Lark, Tree -from lark.grammar import Terminal, Symbol -from lark.lexer import Token, PatternStr, TerminalDef -from lark.reconstruct import Reconstructor -from lark.tree_matcher import is_discarded_terminal -from lark.visitors import Transformer_InPlace -from regex import regex - -from hcl2.const import START_LINE_KEY, END_LINE_KEY -from hcl2.parser import reconstruction_parser - - -# function to remove the backslashes within interpolated portions -def reverse_quotes_within_interpolation(interp_s: str) -> str: - """ - A common operation is to `json.dumps(s)` where s is a string to output in - HCL. This is useful for automatically escaping any quotes within the - string, but this escapes quotes within interpolation incorrectly. This - method removes any erroneous escapes within interpolated segments of a - string. - """ - return re.sub(r"\$\{(.*)}", lambda m: m.group(0).replace('\\"', '"'), interp_s) - - -class WriteTokensAndMetaTransformer(Transformer_InPlace): - """ - Inserts discarded tokens into their correct place, according to the rules - of grammar, and annotates with metadata during reassembly. The metadata - tracked here include the terminal which generated a particular string - output, and the rule that that terminal was matched on. - - This is a modification of lark.reconstruct.WriteTokensTransformer - """ - - tokens: Dict[str, TerminalDef] - term_subs: Dict[str, Callable[[Symbol], str]] - - def __init__( - self, - tokens: Dict[str, TerminalDef], - term_subs: Dict[str, Callable[[Symbol], str]], - ) -> None: - super().__init__() - self.tokens = tokens - self.term_subs = term_subs - - def __default__(self, data, children, meta): - """ - This method is called for every token the transformer visits. - """ - - if not getattr(meta, "match_tree", False): - return Tree(data, children) - iter_args = iter( - [child[2] if isinstance(child, tuple) else child for child in children] - ) - to_write = [] - for sym in meta.orig_expansion: - if is_discarded_terminal(sym): - try: - value = self.term_subs[sym.name](sym) - except KeyError as exc: - token = self.tokens[sym.name] - if not isinstance(token.pattern, PatternStr): - raise NotImplementedError( - f"Reconstructing regexps not supported yet: {token}" - ) from exc - - value = token.pattern.value - - # annotate the leaf with the specific rule (data) and terminal - # (sym) it was generated from - to_write.append((data, sym, value)) - else: - item = next(iter_args) - if isinstance(item, list): - to_write += item - else: - if isinstance(item, Token): - # annotate the leaf with the specific rule (data) and - # terminal (sym) it was generated from - to_write.append((data, sym, item)) - else: - to_write.append(item) - - return to_write - - -class HCLReconstructor(Reconstructor): +from typing import List, Union + +from lark import Tree, Token +from hcl2.rules import tokens +from hcl2.rules.base import BlockRule +from hcl2.rules.for_expressions import ForIntroRule +from hcl2.rules.literal_rules import IdentifierRule +from hcl2.rules.strings import StringRule +from hcl2.rules.expressions import ( + ExprTermRule, + ConditionalRule, + UnaryOpRule, +) + + +class HCLReconstructor: """This class converts a Lark.Tree AST back into a string representing the underlying HCL code.""" - def __init__( - self, - parser: Lark, - term_subs: Optional[Dict[str, Callable[[Symbol], str]]] = None, - ): - Reconstructor.__init__(self, parser, term_subs) - - self.write_tokens: WriteTokensAndMetaTransformer = ( - WriteTokensAndMetaTransformer( - {token.name: token for token in self.tokens}, term_subs or {} - ) - ) - - # these variables track state during reconstruction to enable us to make - # informed decisions about formatting output. They are primarily used - # by the _should_add_space(...) method. - self._last_char_space = True - self._last_terminal: Union[Terminal, None] = None - self._last_rule: Union[Tree, Token, None] = None - self._deferred_item = None - - def should_be_wrapped_in_spaces(self, terminal: Terminal) -> bool: - """Whether given terminal should be wrapped in spaces""" - return terminal.name in { - "IF", - "IN", - "FOR", - "FOR_EACH", - "FOR_OBJECT_ARROW", - "COLON", - "QMARK", - "BINARY_OP", - } - - def _is_equals_sign(self, terminal) -> bool: - return ( - isinstance(self._last_rule, Token) - and self._last_rule.value in ("attribute", "object_elem") - and self._last_terminal == Terminal("EQ") - and terminal != Terminal("NL_OR_COMMENT") - ) - - # pylint: disable=too-many-branches, too-many-return-statements - def _should_add_space(self, rule, current_terminal, is_block_label: bool = False): - """ - This method documents the situations in which we add space around - certain tokens while reconstructing the generated HCL. - - Additional rules can be added here if the generated HCL has - improper whitespace (affecting parse OR affecting ability to perfectly - reconstruct a file down to the whitespace level.) - - It has the following information available to make its decision: - - - the last token (terminal) we output - - the last rule that token belonged to - - the current token (terminal) we're about to output - - the rule the current token belongs to - - This should be sufficient to make a spacing decision. - """ - - # we don't need to add multiple spaces - if self._last_char_space: + _binary_op_types = { + "DOUBLE_EQ", + "NEQ", + "LT", + "GT", + "LEQ", + "GEQ", + "MINUS", + "ASTERISK", + "SLASH", + "PERCENT", + "DOUBLE_AMP", + "DOUBLE_PIPE", + "PLUS", + } + + def __init__(self): + self._reset_state() + + def _reset_state(self): + """State tracking for formatting decisions""" + self._last_was_space = True + self._current_indent = 0 + self._last_token_name = None + self._last_rule_name = None + self._in_parentheses = False + self._in_object = False + self._in_tuple = False + + def _should_add_space_before( + self, current_node: Union[Tree, Token], parent_rule_name: str = None + ) -> bool: + """Determine if we should add a space before the current token/rule.""" + + # Don't add space if we already have one + if self._last_was_space: return False - # we don't add a space at the start of the file - if not self._last_terminal or not self._last_rule: + # Don't add space at the beginning + if self._last_token_name is None: return False - if self._is_equals_sign(current_terminal): - return True + if isinstance(current_node, Token): + token_type = current_node.type - if is_block_label and isinstance(rule, Token) and rule.value == "string": + # Space before '{' in blocks if ( - current_terminal == self._last_terminal == Terminal("DBLQUOTE") - or current_terminal == Terminal("DBLQUOTE") - and self._last_terminal == Terminal("NAME") + token_type == tokens.LBRACE.lark_name() + and parent_rule_name == BlockRule.lark_name() ): return True - # if we're in a ternary or binary operator, add space around the operator - if ( - isinstance(rule, Token) - and rule.value - in [ - "conditional", - "binary_operator", - ] - and self.should_be_wrapped_in_spaces(current_terminal) - ): - return True - - # if we just left a ternary or binary operator, add space around the - # operator unless there's a newline already - if ( - isinstance(self._last_rule, Token) - and self._last_rule.value - in [ - "conditional", - "binary_operator", - ] - and self.should_be_wrapped_in_spaces(self._last_terminal) - and current_terminal != Terminal("NL_OR_COMMENT") - ): - return True - - # if we're in a for or if statement and find a keyword, add a space - if ( - isinstance(rule, Token) - and rule.value - in [ - "for_object_expr", - "for_cond", - "for_intro", - ] - and self.should_be_wrapped_in_spaces(current_terminal) - ): - return True - - # if we've just left a for or if statement and find a keyword, add a - # space, unless we have a newline - if ( - isinstance(self._last_rule, Token) - and self._last_rule.value - in [ - "for_object_expr", - "for_cond", - "for_intro", - ] - and self.should_be_wrapped_in_spaces(self._last_terminal) - and current_terminal != Terminal("NL_OR_COMMENT") - ): - return True - - # if we're in a block - if (isinstance(rule, Token) and rule.value == "block") or ( - isinstance(rule, str) and re.match(r"^__block_(star|plus)_.*", rule) - ): - # always add space before the starting brace - if current_terminal == Terminal("LBRACE"): + # Space around Conditional Expression operators + if ( + parent_rule_name == ConditionalRule.lark_name() + and token_type in [tokens.COLON.lark_name(), tokens.QMARK.lark_name()] + or self._last_token_name + in [tokens.COLON.lark_name(), tokens.QMARK.lark_name()] + ): return True - # always add space before the closing brace - if current_terminal == Terminal( - "RBRACE" - ) and self._last_terminal != Terminal("LBRACE"): + # Space after + if ( + parent_rule_name == ForIntroRule.lark_name() + and token_type == tokens.COLON.lark_name() + ): + return True - # always add space between string literals - if current_terminal == Terminal("STRING_CHARS"): + # Space after commas in tuples and function arguments... + if self._last_token_name == tokens.COMMA.lark_name(): + # ... except before closing brackets or newlines + if token_type in (tokens.RSQB.lark_name(), "NL_OR_COMMENT"): + return False return True - # if we just opened a block, add a space, unless the block is empty - # or has a newline - if ( - isinstance(self._last_rule, Token) - and self._last_rule.value == "block" - and self._last_terminal == Terminal("LBRACE") - and current_terminal not in [Terminal("RBRACE"), Terminal("NL_OR_COMMENT")] - ): - return True - - # if we're in a tuple or function arguments (this rule matches commas between items) - if isinstance(self._last_rule, str) and re.match( - r"^__(tuple|arguments)_(star|plus)_.*", self._last_rule - ): - - # string literals, decimals, and identifiers should always be - # preceded by a space if they're following a comma in a tuple or - # function arg - if current_terminal in [ - Terminal("DBLQUOTE"), - Terminal("DECIMAL"), - Terminal("NAME"), - Terminal("NEGATIVE_DECIMAL"), + if token_type in [ + tokens.FOR.lark_name(), + tokens.IN.lark_name(), + tokens.IF.lark_name(), + tokens.ELLIPSIS.lark_name(), ]: return True - # the catch-all case, we're not sure, so don't add a space - return False - - def _reconstruct(self, tree, is_block_label=False): - unreduced_tree = self.match_tree(tree, tree.data) - res = self.write_tokens.transform(unreduced_tree) - for item in res: - # any time we encounter a child tree, we recurse - if isinstance(item, Tree): - yield from self._reconstruct( - item, (unreduced_tree.data == "block" and item.data != "body") - ) - - # every leaf should be a tuple, which contains information about - # which terminal the leaf represents - elif isinstance(item, tuple): - rule, terminal, value = item - - # first, handle any deferred items - if self._deferred_item is not None: - ( - deferred_rule, - deferred_terminal, - deferred_value, - ) = self._deferred_item - - # if we deferred a comma and the next character ends a - # parenthesis or block, we can throw it out - if deferred_terminal == Terminal("COMMA") and terminal in [ - Terminal("RPAR"), - Terminal("RBRACE"), - ]: - pass - # in any other case, we print the deferred item - else: - yield deferred_value - - # and do our bookkeeping - self._last_terminal = deferred_terminal - self._last_rule = deferred_rule - if deferred_value and not deferred_value[-1].isspace(): - self._last_char_space = False - - # clear the deferred item - self._deferred_item = None - - # potentially add a space before the next token - if self._should_add_space(rule, terminal, is_block_label): - yield " " - self._last_char_space = True - - # potentially defer the item if needed - if terminal in [Terminal("COMMA")]: - self._deferred_item = item - else: - # otherwise print the next token - yield value - - # and do our bookkeeping so we can make an informed - # decision about formatting next time - self._last_terminal = terminal - self._last_rule = rule - if value: - self._last_char_space = value[-1].isspace() - - else: - raise RuntimeError(f"Unknown bare token type: {item}") - - def reconstruct(self, tree, postproc=None, insert_spaces=False): - """Convert a Lark.Tree AST back into a string representation of HCL.""" - return Reconstructor.reconstruct( - self, - tree, - postproc, - insert_spaces, - ) - - -class HCLReverseTransformer: - """ - The reverse of hcl2.transformer.DictTransformer. This method attempts to - convert a dict back into a working AST, which can be written back out. - """ - - @staticmethod - def _name_to_identifier(name: str) -> Tree: - """Converts a string to a NAME token within an identifier rule.""" - return Tree(Token("RULE", "identifier"), [Token("NAME", name)]) - - @staticmethod - def _escape_interpolated_str(interp_s: str) -> str: - if interp_s.strip().startswith("<<-") or interp_s.strip().startswith("<<"): - # For heredoc strings, preserve their format exactly - return reverse_quotes_within_interpolation(interp_s) - # Escape backslashes first (very important to do this first) - escaped = interp_s.replace("\\", "\\\\") - # Escape quotes - escaped = escaped.replace('"', '\\"') - # Escape control characters - escaped = escaped.replace("\n", "\\n") - escaped = escaped.replace("\r", "\\r") - escaped = escaped.replace("\t", "\\t") - escaped = escaped.replace("\b", "\\b") - escaped = escaped.replace("\f", "\\f") - # find each interpolation within the string and remove the backslashes - interp_s = reverse_quotes_within_interpolation(f"{escaped}") - return interp_s - - @staticmethod - def _block_has_label(block: dict) -> bool: - return len(block.keys()) == 1 - - def __init__(self): - pass - - def transform(self, hcl_dict: dict) -> Tree: - """Given a dict, return a Lark.Tree representing the HCL AST.""" - level = 0 - body = self._transform_dict_to_body(hcl_dict, level) - start = Tree(Token("RULE", "start"), [body]) - return start - - @staticmethod - def _is_string_wrapped_tf(interp_s: str) -> bool: - """ - Determines whether a string is a complex HCL data structure - wrapped in ${ interpolation } characters. - """ - if not interp_s.startswith("${") or not interp_s.endswith("}"): - return False + if ( + self._last_token_name + in [ + tokens.FOR.lark_name(), + tokens.IN.lark_name(), + tokens.IF.lark_name(), + ] + and token_type != "NL_OR_COMMENT" + ): + return True - nested_tokens = [] - for match in re.finditer(r"\$?\{|}", interp_s): - if match.group(0) in ["${", "{"]: - nested_tokens.append(match.group(0)) - elif match.group(0) == "}": - nested_tokens.pop() - - # if we exit ${ interpolation } before the end of the string, - # this interpolated string has string parts and can't represent - # a valid HCL expression on its own (without quotes) - if len(nested_tokens) == 0 and match.end() != len(interp_s): - return False + # Space around for_object arrow + if tokens.FOR_OBJECT_ARROW.lark_name() in [ + token_type, + self._last_token_name, + ]: + return True - return True - - @classmethod - def _unwrap_interpolation(cls, value: str) -> str: - if cls._is_string_wrapped_tf(value): - return value[2:-1] - return value - - def _newline(self, level: int, count: int = 1) -> Tree: - return Tree( - Token("RULE", "new_line_or_comment"), - [Token("NL_OR_COMMENT", f"\n{' ' * level}") for _ in range(count)], - ) - - def _build_string_rule(self, string: str, level: int = 0) -> Tree: - # grammar in hcl2.lark defines that a string is built of any number of string parts, - # each string part can be either interpolation expression, escaped interpolation string - # or regular string - # this method build hcl2 string rule based on arbitrary string, - # splitting such string into individual parts and building a lark tree out of them - # - result = [] + # Space after ellipsis in function arguments + if self._last_token_name == tokens.ELLIPSIS.lark_name(): + return True - pattern = regex.compile(r"(\${1,2}\{(?:[^{}]|(?R))*\})") - parts = [part for part in pattern.split(string) if part != ""] - # e.g. 'aaa$${bbb}ccc${"ddd-${eee}"}' -> ['aaa', '$${bbb}', 'ccc', '${"ddd-${eee}"}'] - # 'aa-${"bb-${"cc-${"dd-${5 + 5}"}"}"}' -> ['aa-', '${"bb-${"cc-${"dd-${5 + 5}"}"}"}'] - - for part in parts: - if part.startswith("$${") and part.endswith("}"): - result.append(Token("ESCAPED_INTERPOLATION", part)) - - # unwrap interpolation expression and recurse into it - elif part.startswith("${") and part.endswith("}"): - part = part[2:-1] - if part.startswith('"') and part.endswith('"'): - part = part[1:-1] - part = self._transform_value_to_expr_term(part, level) - else: - part = Tree( - Token("RULE", "expr_term"), - [Tree(Token("RULE", "identifier"), [Token("NAME", part)])], - ) - - result.append(Tree(Token("RULE", "interpolation"), [part])) - - else: - result.append(Token("STRING_CHARS", part)) - - result = [Tree(Token("RULE", "string_part"), [element]) for element in result] - return Tree(Token("RULE", "string"), result) - - def _is_block(self, value: Any) -> bool: - if isinstance(value, dict): - block_body = value - if START_LINE_KEY in block_body.keys() or END_LINE_KEY in block_body.keys(): + if tokens.EQ.lark_name() in [token_type, self._last_token_name]: return True - try: - # if block is labeled, actual body might be nested - # pylint: disable=W0612 - block_label, block_body = next(iter(value.items())) - except StopIteration: - # no more potential labels = nothing more to check + # Don't add space around operator tokens inside unary_op + if parent_rule_name == UnaryOpRule.lark_name(): return False - return self._is_block(block_body) + if ( + token_type in self._binary_op_types + or self._last_token_name in self._binary_op_types + ): + return True - if isinstance(value, list): - if len(value) > 0: - return self._is_block(value[0]) + elif isinstance(current_node, Tree): + rule_name = current_node.data + + if parent_rule_name == BlockRule.lark_name(): + # Add space between multiple string/identifier labels in blocks + if rule_name in [ + StringRule.lark_name(), + IdentifierRule.lark_name(), + ] and self._last_rule_name in [ + StringRule.lark_name(), + IdentifierRule.lark_name(), + ]: + return True return False - def _calculate_block_labels(self, block: dict) -> Tuple[List[str], dict]: - # if block doesn't have a label - if len(block.keys()) != 1: - return [], block - - # otherwise, find the label - curr_label = list(block)[0] - potential_body = block[curr_label] - - # __start_line__ and __end_line__ metadata are not labels - if ( - START_LINE_KEY in potential_body.keys() - or END_LINE_KEY in potential_body.keys() - ): - return [curr_label], potential_body - - # recurse and append the label - next_label, block_body = self._calculate_block_labels(potential_body) - return [curr_label] + next_label, block_body - - # pylint:disable=R0914 - def _transform_dict_to_body(self, hcl_dict: dict, level: int) -> Tree: - # we add a newline at the top of a body within a block, not the root body - # >2 here is to ignore the __start_line__ and __end_line__ metadata - if level > 0 and len(hcl_dict) > 2: - children = [self._newline(level)] + def _reconstruct_tree(self, tree: Tree, parent_rule_name: str = None) -> List[str]: + """Recursively reconstruct a Tree node into HCL text fragments.""" + result = [] + rule_name = tree.data + + if rule_name == UnaryOpRule.lark_name(): + for i, child in enumerate(tree.children): + result.extend(self._reconstruct_node(child, rule_name)) + if i == 0: + # Suppress space between unary operator and its operand + self._last_was_space = True + + elif rule_name == ExprTermRule.lark_name(): + # Check if parenthesized + if ( + len(tree.children) >= 3 + and isinstance(tree.children[0], Token) + and tree.children[0].type == tokens.LPAR.lark_name() + and isinstance(tree.children[-1], Token) + and tree.children[-1].type == tokens.RPAR.lark_name() + ): + self._in_parentheses = True + + for child in tree.children: + result.extend(self._reconstruct_node(child, rule_name)) + + self._in_parentheses = False + + else: + for child in tree.children: + result.extend(self._reconstruct_node(child, rule_name)) + + if self._should_add_space_before(tree, parent_rule_name): + result.insert(0, " ") + + # Update state tracking + self._last_rule_name = rule_name + if result: + self._last_was_space = result[-1].endswith(" ") or result[-1].endswith("\n") + + return result + + def _reconstruct_token(self, token: Token, parent_rule_name: str = None) -> str: + """Reconstruct a Token node into HCL text fragments.""" + result = str(token.value) + if self._should_add_space_before(token, parent_rule_name): + result = " " + result + + self._last_token_name = token.type + if len(token) != 0: + self._last_was_space = result[-1].endswith(" ") or result[-1].endswith("\n") + + return result + + def _reconstruct_node( + self, node: Union[Tree, Token], parent_rule_name: str = None + ) -> List[str]: + """Reconstruct any node (Tree or Token) into HCL text fragments.""" + if isinstance(node, Tree): + return self._reconstruct_tree(node, parent_rule_name) + elif isinstance(node, Token): + return [self._reconstruct_token(node, parent_rule_name)] else: - children = [] - - # iterate through each attribute or sub-block of this block - for key, value in hcl_dict.items(): - if key in [START_LINE_KEY, END_LINE_KEY]: - continue - - # construct the identifier, whether that be a block type name or an attribute key - identifier_name = self._name_to_identifier(key) - - # first, check whether the value is a "block" - if self._is_block(value): - for block_v in value: - block_labels, block_body_dict = self._calculate_block_labels( - block_v - ) - block_label_trees = [ - self._build_string_rule(block_label, level) - for block_label in block_labels - ] - block_body = self._transform_dict_to_body( - block_body_dict, level + 1 - ) - - # create our actual block to add to our own body - block = Tree( - Token("RULE", "block"), - [identifier_name] + block_label_trees + [block_body], - ) - children.append(block) - # add empty line after block - new_line = self._newline(level - 1) - # add empty line with indentation for next element in the block - new_line.children.append(self._newline(level).children[0]) - - children.append(new_line) - - # if the value isn't a block, it's an attribute - else: - expr_term = self._transform_value_to_expr_term(value, level) - attribute = Tree( - Token("RULE", "attribute"), - [identifier_name, Token("EQ", " ="), expr_term], - ) - children.append(attribute) - children.append(self._newline(level)) - - # since we're leaving a block body here, reduce the indentation of the - # final newline if it exists - if ( - len(children) > 0 - and isinstance(children[-1], Tree) - and children[-1].data.type == "RULE" - and children[-1].data.value == "new_line_or_comment" - ): - children[-1] = self._newline(level - 1) - - return Tree(Token("RULE", "body"), children) - - # pylint: disable=too-many-branches, too-many-return-statements too-many-statements - def _transform_value_to_expr_term(self, value, level) -> Union[Token, Tree]: - """Transforms a value from a dictionary into an "expr_term" (a value in HCL2) - - Anything passed to this function is treated "naively". Any lists passed - are assumed to be tuples, and any dicts passed are assumed to be objects. - No more checks will be performed for either to see if they are "blocks" - as this check happens in `_transform_dict_to_body`. - """ - - # for lists, recursively turn the child elements into expr_terms and - # store within a tuple - if isinstance(value, list): - tuple_tree = Tree( - Token("RULE", "tuple"), - [ - self._transform_value_to_expr_term(tuple_v, level) - for tuple_v in value - ], - ) - return Tree(Token("RULE", "expr_term"), [tuple_tree]) - - if value is None: - return Tree( - Token("RULE", "expr_term"), - [Tree(Token("RULE", "identifier"), [Token("NAME", "null")])], - ) - - # for dicts, recursively turn the child k/v pairs into object elements - # and store within an object - if isinstance(value, dict): - elements = [] - - # if the object has elements, put it on a newline - if len(value) > 0: - elements.append(self._newline(level + 1)) - - # iterate through the items and add them to the object - for i, (k, dict_v) in enumerate(value.items()): - if k in [START_LINE_KEY, END_LINE_KEY]: - continue - - value_expr_term = self._transform_value_to_expr_term(dict_v, level + 1) - k = self._unwrap_interpolation(k) - elements.append( - Tree( - Token("RULE", "object_elem"), - [ - Tree( - Token("RULE", "object_elem_key"), - [Tree(Token("RULE", "identifier"), [Token("NAME", k)])], - ), - Token("EQ", " ="), - value_expr_term, - ], - ) - ) - - # add indentation appropriately - if i < len(value) - 1: - elements.append(self._newline(level + 1)) - else: - elements.append(self._newline(level)) - return Tree( - Token("RULE", "expr_term"), [Tree(Token("RULE", "object"), elements)] - ) - - # treat booleans appropriately - if isinstance(value, bool): - return Tree( - Token("RULE", "expr_term"), - [ - Tree( - Token("RULE", "identifier"), - [Token("NAME", "true" if value else "false")], - ) - ], - ) - - # store integers as literals, digit by digit - if isinstance(value, int): - return Tree( - Token("RULE", "expr_term"), - [ - Tree( - Token("RULE", "int_lit"), - [Token("DECIMAL", digit) for digit in str(value)], - ) - ], - ) - - if isinstance(value, float): - value = str(value) - literal = [] - - if value[0] == "-": - # pop two first chars - minus and a digit - literal.append(Token("NEGATIVE_DECIMAL", value[:2])) - value = value[2:] - - while value != "": - char = value[0] - - if char == ".": - # current char marks beginning of decimal part: pop all remaining chars and end the loop - literal.append(Token("DOT", char)) - literal.extend(Token("DECIMAL", char) for char in value[1:]) - break - - if char == "e": - # current char marks beginning of e-notation: pop all remaining chars and end the loop - literal.append(Token("EXP_MARK", value)) - break - - literal.append(Token("DECIMAL", char)) - value = value[1:] - - return Tree( - Token("RULE", "expr_term"), - [Tree(Token("RULE", "float_lit"), literal)], - ) - - # store strings as single literals - if isinstance(value, str): - # potentially unpack a complex syntax structure - if self._is_string_wrapped_tf(value): - # we have to unpack it by parsing it - wrapped_value = re.match(r"\$\{(.*)}", value).group(1) # type:ignore - ast = reconstruction_parser().parse(f"value = {wrapped_value}") - - if ast.data != Token("RULE", "start"): - raise RuntimeError("Token must be `start` RULE") - - body = ast.children[0] - if body.data != Token("RULE", "body"): - raise RuntimeError("Token must be `body` RULE") - - attribute = body.children[0] - if attribute.data != Token("RULE", "attribute"): - raise RuntimeError("Token must be `attribute` RULE") - - if attribute.children[1] != Token("EQ", " ="): - raise RuntimeError("Token must be `EQ (=)` rule") - - parsed_value = attribute.children[2] - return parsed_value - - # otherwise it's a string - return Tree( - Token("RULE", "expr_term"), - [self._build_string_rule(self._escape_interpolated_str(value), level)], - ) - - # otherwise, we don't know the type - raise RuntimeError(f"Unknown type to transform {type(value)}") + # Fallback: convert to string + return [str(node)] + + def reconstruct(self, tree: Tree, postproc=None, insert_spaces=False) -> str: + """Convert a Lark.Tree AST back into a string representation of HCL.""" + # Reset state + self._reset_state() + + # Reconstruct the tree + fragments = self._reconstruct_node(tree) + + # Join fragments and apply post-processing + result = "".join(fragments) + + if postproc: + result = postproc(result) + + # Ensure file ends with newline + if result and not result.endswith("\n"): + result += "\n" + + return result diff --git a/hcl2/py.typed b/hcl2/rules/__init__.py similarity index 100% rename from hcl2/py.typed rename to hcl2/rules/__init__.py diff --git a/hcl2/rules/abstract.py b/hcl2/rules/abstract.py new file mode 100644 index 00000000..316c777a --- /dev/null +++ b/hcl2/rules/abstract.py @@ -0,0 +1,109 @@ +from abc import ABC, abstractmethod +from typing import Any, Union, List, Optional, Callable + +from lark import Token, Tree +from lark.tree import Meta + +from hcl2.utils import SerializationOptions, SerializationContext + + +class LarkElement(ABC): + @staticmethod + @abstractmethod + def lark_name() -> str: + raise NotImplementedError() + + def __init__(self, index: int = -1, parent: "LarkElement" = None): + self._index = index + self._parent = parent + + def set_index(self, i: int): + self._index = i + + def set_parent(self, node: "LarkElement"): + self._parent = node + + @abstractmethod + def to_lark(self) -> Any: + raise NotImplementedError() + + @abstractmethod + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + raise NotImplementedError() + + +class LarkToken(LarkElement, ABC): + def __init__(self, value: Union[str, int, float]): + self._value = value + super().__init__() + + @property + @abstractmethod + def serialize_conversion(self) -> Callable: + raise NotImplementedError() + + @property + def value(self): + return self._value + + def set_value(self, value: Any): + self._value = value + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return self.serialize_conversion(self.value) + + def to_lark(self) -> Token: + return Token(self.lark_name(), self.value) + + def __str__(self) -> str: + return str(self._value) + + def __repr__(self) -> str: + return f"" + + +class LarkRule(LarkElement, ABC): + @abstractmethod + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + raise NotImplementedError() + + @property + def children(self) -> List[LarkElement]: + return self._children + + @property + def parent(self): + return self._parent + + @property + def index(self): + return self._index + + def to_lark(self) -> Tree: + result_children = [] + for child in self._children: + if child is None: + continue + + result_children.append(child.to_lark()) + + return Tree(self.lark_name(), result_children, meta=self._meta) + + def __init__(self, children: List[LarkElement], meta: Optional[Meta] = None): + super().__init__() + self._children = children + self._meta = meta or Meta() + + for index, child in enumerate(children): + if child is not None: + child.set_index(index) + child.set_parent(self) + + def __repr__(self): + return f"" diff --git a/hcl2/rules/base.py b/hcl2/rules/base.py new file mode 100644 index 00000000..a025949a --- /dev/null +++ b/hcl2/rules/base.py @@ -0,0 +1,152 @@ +from collections import defaultdict +from typing import Tuple, Any, List, Union, Optional + +from lark.tree import Meta + +from hcl2.const import IS_BLOCK +from hcl2.rules.abstract import LarkRule, LarkToken +from hcl2.rules.expressions import ExpressionRule, ExprTermRule +from hcl2.rules.literal_rules import IdentifierRule +from hcl2.rules.strings import StringRule +from hcl2.rules.tokens import NAME, EQ, LBRACE, RBRACE + +from hcl2.rules.whitespace import NewLineOrCommentRule +from hcl2.utils import SerializationOptions, SerializationContext + + +class AttributeRule(LarkRule): + _children: Tuple[ + IdentifierRule, + EQ, + ExprTermRule, + ] + + @staticmethod + def lark_name() -> str: + return "attribute" + + @property + def identifier(self) -> IdentifierRule: + return self._children[0] + + @property + def expression(self) -> ExprTermRule: + return self._children[2] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return {self.identifier.serialize(options): self.expression.serialize(options)} + + +class BodyRule(LarkRule): + + _children: List[ + Union[ + NewLineOrCommentRule, + AttributeRule, + "BlockRule", + ] + ] + + @staticmethod + def lark_name() -> str: + return "body" + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + attribute_names = set() + comments = [] + inline_comments = [] + + result = defaultdict(list) + + for child in self._children: + + if isinstance(child, BlockRule): + name = child.labels[0].serialize(options) + if name in attribute_names: + raise RuntimeError(f"Attribute {name} is already defined.") + result[name].append(child.serialize(options)) + + if isinstance(child, AttributeRule): + attribute_names.add(child) + result.update(child.serialize(options)) + if options.with_comments: + # collect in-line comments from attribute assignments, expressions etc + inline_comments.extend(child.expression.inline_comments()) + + if isinstance(child, NewLineOrCommentRule) and options.with_comments: + child_comments = child.to_list() + if child_comments: + comments.extend(child_comments) + + if options.with_comments: + if comments: + result["__comments__"] = comments + if inline_comments: + result["__inline_comments__"] = inline_comments + + return result + + +class StartRule(LarkRule): + + _children: Tuple[BodyRule] + + @property + def body(self) -> BodyRule: + return self._children[0] + + @staticmethod + def lark_name() -> str: + return "start" + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return self.body.serialize(options) + + +class BlockRule(LarkRule): + + _children: Tuple[ + IdentifierRule, + Optional[Union[IdentifierRule, StringRule]], + LBRACE, + BodyRule, + RBRACE, + ] + + def __init__(self, children, meta: Optional[Meta] = None): + super().__init__(children, meta) + + *self._labels, self._body = [ + child for child in children if not isinstance(child, LarkToken) + ] + + @staticmethod + def lark_name() -> str: + return "block" + + @property + def labels(self) -> List[NAME]: + return list(filter(lambda label: label is not None, self._labels)) + + @property + def body(self) -> BodyRule: + return self._body + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + result = self._body.serialize(options) + if options.explicit_blocks: + result.update({IS_BLOCK: True}) + + labels = self._labels + for label in reversed(labels[1:]): + result = {label.serialize(options): result} + + return result diff --git a/hcl2/rules/containers.py b/hcl2/rules/containers.py new file mode 100644 index 00000000..3f590c5c --- /dev/null +++ b/hcl2/rules/containers.py @@ -0,0 +1,232 @@ +from typing import Tuple, List, Optional, Union, Any + +from hcl2.rules.abstract import LarkRule +from hcl2.rules.expressions import ExpressionRule +from hcl2.rules.literal_rules import ( + FloatLitRule, + IntLitRule, + IdentifierRule, +) +from hcl2.rules.strings import StringRule +from hcl2.rules.tokens import ( + COLON, + EQ, + LBRACE, + COMMA, + RBRACE, + LSQB, + RSQB, + LPAR, + RPAR, + DOT, +) +from hcl2.rules.whitespace import ( + NewLineOrCommentRule, + InlineCommentMixIn, +) +from hcl2.utils import ( + SerializationOptions, + SerializationContext, + to_dollar_string, +) + + +class TupleRule(InlineCommentMixIn): + + _children: Tuple[ + LSQB, + Optional[NewLineOrCommentRule], + Tuple[ + ExpressionRule, + Optional[NewLineOrCommentRule], + COMMA, + Optional[NewLineOrCommentRule], + # ... + ], + ExpressionRule, + Optional[NewLineOrCommentRule], + Optional[COMMA], + Optional[NewLineOrCommentRule], + RSQB, + ] + + @staticmethod + def lark_name() -> str: + return "tuple" + + @property + def elements(self) -> List[ExpressionRule]: + return [ + child for child in self.children[1:-1] if isinstance(child, ExpressionRule) + ] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + if not options.wrap_tuples and not context.inside_dollar_string: + return [element.serialize(options, context) for element in self.elements] + + with context.modify(inside_dollar_string=True): + result = "[" + result += ", ".join( + str(element.serialize(options, context)) for element in self.elements + ) + result += "]" + + if not context.inside_dollar_string: + result = to_dollar_string(result) + + return result + + +class ObjectElemKeyRule(LarkRule): + + key_T = Union[FloatLitRule, IntLitRule, IdentifierRule, StringRule] + + _children: Tuple[key_T] + + @staticmethod + def lark_name() -> str: + return "object_elem_key" + + @property + def value(self) -> key_T: + return self._children[0] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + result = self.value.serialize(options, context) + # Object keys must be strings for JSON compatibility + if isinstance(result, (int, float)): + result = str(result) + return result + + +class ObjectElemKeyExpressionRule(LarkRule): + + _children: Tuple[ + LPAR, + ExpressionRule, + RPAR, + ] + + @staticmethod + def lark_name() -> str: + return "object_elem_key_expression" + + @property + def expression(self) -> ExpressionRule: + return self._children[1] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + with context.modify(inside_dollar_string=True): + result = f"({self.expression.serialize(options, context)})" + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result + + +class ObjectElemKeyDotAccessor(LarkRule): + + _children: Tuple[ + IdentifierRule, + Tuple[ + IdentifierRule, + DOT, + ], + ] + + @staticmethod + def lark_name() -> str: + return "object_elem_key_dot_accessor" + + @property + def identifiers(self) -> List[IdentifierRule]: + return [child for child in self._children if isinstance(child, IdentifierRule)] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return ".".join( + identifier.serialize(options, context) for identifier in self.identifiers + ) + + +class ObjectElemRule(LarkRule): + + _children: Tuple[ + ObjectElemKeyRule, + Union[EQ, COLON], + ExpressionRule, + ] + + @staticmethod + def lark_name() -> str: + return "object_elem" + + @property + def key(self) -> ObjectElemKeyRule: + return self._children[0] + + @property + def expression(self): + return self._children[2] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return { + self.key.serialize(options, context): self.expression.serialize( + options, context + ) + } + + +class ObjectRule(InlineCommentMixIn): + + _children: Tuple[ + LBRACE, + Optional[NewLineOrCommentRule], + Tuple[ + ObjectElemRule, + Optional[NewLineOrCommentRule], + Optional[COMMA], + Optional[NewLineOrCommentRule], + ], + RBRACE, + ] + + @staticmethod + def lark_name() -> str: + return "object" + + @property + def elements(self) -> List[ObjectElemRule]: + return [ + child for child in self.children[1:-1] if isinstance(child, ObjectElemRule) + ] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + if not options.wrap_objects and not context.inside_dollar_string: + result = {} + for element in self.elements: + result.update(element.serialize(options, context)) + + return result + + with context.modify(inside_dollar_string=True): + result = "{" + result += ", ".join( + f"{element.key.serialize(options, context)} = {element.expression.serialize(options,context)}" + for element in self.elements + ) + result += "}" + + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result diff --git a/hcl2/rules/expressions.py b/hcl2/rules/expressions.py new file mode 100644 index 00000000..1e1d0cd8 --- /dev/null +++ b/hcl2/rules/expressions.py @@ -0,0 +1,258 @@ +from abc import ABC +from copy import deepcopy +from typing import Any, Tuple, Optional + +from lark.tree import Meta + +from hcl2.rules.abstract import ( + LarkToken, +) +from hcl2.rules.literal_rules import BinaryOperatorRule +from hcl2.rules.tokens import LPAR, RPAR, QMARK, COLON +from hcl2.rules.whitespace import ( + NewLineOrCommentRule, + InlineCommentMixIn, +) +from hcl2.utils import ( + wrap_into_parentheses, + to_dollar_string, + SerializationOptions, + SerializationContext, +) + + +class ExpressionRule(InlineCommentMixIn, ABC): + @staticmethod + def lark_name() -> str: + return "expression" + + def __init__( + self, children, meta: Optional[Meta] = None, parentheses: bool = False + ): + super().__init__(children, meta) + self._parentheses = parentheses + + def _wrap_into_parentheses( + self, value: str, options=SerializationOptions(), context=SerializationContext() + ) -> str: + # do not wrap into parentheses if + # 1. already wrapped or + # 2. is top-level expression (unless explicitly wrapped) + if context.inside_parentheses: + return value + # Look through ExprTermRule wrapper to determine if truly nested + parent = getattr(self, "parent", None) + if parent is None: + return value + if isinstance(parent, ExprTermRule): + if not isinstance(parent.parent, ExpressionRule): + return value + elif not isinstance(parent, ExpressionRule): + return value + return wrap_into_parentheses(value) + + +class ExprTermRule(ExpressionRule): + + type_ = Tuple[ + Optional[LPAR], + Optional[NewLineOrCommentRule], + ExpressionRule, + Optional[NewLineOrCommentRule], + Optional[RPAR], + ] + + _children: type_ + + @staticmethod + def lark_name() -> str: + return "expr_term" + + def __init__(self, children, meta: Optional[Meta] = None): + parentheses = False + if ( + isinstance(children[0], LarkToken) + and children[0].lark_name() == "LPAR" + and isinstance(children[-1], LarkToken) + and children[-1].lark_name() == "RPAR" + ): + parentheses = True + else: + children = [None, *children, None] + self._insert_optionals(children, [1, 3]) + super().__init__(children, meta, parentheses) + + @property + def parentheses(self) -> bool: + return self._parentheses + + @property + def expression(self) -> ExpressionRule: + return self._children[2] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + with context.modify( + inside_parentheses=self.parentheses or context.inside_parentheses + ): + result = self.expression.serialize(options, context) + + if self.parentheses: + result = wrap_into_parentheses(result) + if not context.inside_dollar_string: + result = to_dollar_string(result) + + return result + + +class ConditionalRule(ExpressionRule): + + _children: Tuple[ + ExpressionRule, + QMARK, + Optional[NewLineOrCommentRule], + ExpressionRule, + Optional[NewLineOrCommentRule], + COLON, + Optional[NewLineOrCommentRule], + ExpressionRule, + ] + + @staticmethod + def lark_name() -> str: + return "conditional" + + def __init__(self, children, meta: Optional[Meta] = None): + self._insert_optionals(children, [2, 4, 6]) + super().__init__(children, meta) + + @property + def condition(self) -> ExpressionRule: + return self._children[0] + + @property + def if_true(self) -> ExpressionRule: + return self._children[3] + + @property + def if_false(self) -> ExpressionRule: + return self._children[7] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + with context.modify(inside_dollar_string=True): + result = ( + f"{self.condition.serialize(options, context)} " + f"? {self.if_true.serialize(options, context)} " + f": {self.if_false.serialize(options, context)}" + ) + + if not context.inside_dollar_string: + result = to_dollar_string(result) + + if options.force_operation_parentheses: + result = self._wrap_into_parentheses(result, options, context) + + return result + + +class BinaryTermRule(ExpressionRule): + + _children: Tuple[ + BinaryOperatorRule, + Optional[NewLineOrCommentRule], + ExprTermRule, + ] + + @staticmethod + def lark_name() -> str: + return "binary_term" + + def __init__(self, children, meta: Optional[Meta] = None): + self._insert_optionals(children, [1]) + super().__init__(children, meta) + + @property + def binary_operator(self) -> BinaryOperatorRule: + return self._children[0] + + @property + def expr_term(self) -> ExprTermRule: + return self._children[2] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return f"{self.binary_operator.serialize(options, context)} {self.expr_term.serialize(options, context)}" + + +class BinaryOpRule(ExpressionRule): + _children: Tuple[ + ExprTermRule, + BinaryTermRule, + Optional[NewLineOrCommentRule], + ] + + @staticmethod + def lark_name() -> str: + return "binary_op" + + @property + def expr_term(self) -> ExprTermRule: + return self._children[0] + + @property + def binary_term(self) -> BinaryTermRule: + return self._children[1] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + + with context.modify(inside_dollar_string=True): + lhs = self.expr_term.serialize(options, context) + operator = self.binary_term.binary_operator.serialize(options, context) + rhs = self.binary_term.expr_term.serialize(options, context) + + result = f"{lhs} {operator} {rhs}" + + if not context.inside_dollar_string: + result = to_dollar_string(result) + + if options.force_operation_parentheses: + result = self._wrap_into_parentheses(result, options, context) + return result + + +class UnaryOpRule(ExpressionRule): + + _children: Tuple[LarkToken, ExprTermRule] + + @staticmethod + def lark_name() -> str: + return "unary_op" + + @property + def operator(self) -> str: + return str(self._children[0]) + + @property + def expr_term(self): + return self._children[1] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + + with context.modify(inside_dollar_string=True): + result = f"{self.operator}{self.expr_term.serialize(options, context)}" + + if not context.inside_dollar_string: + result = to_dollar_string(result) + + if options.force_operation_parentheses: + result = self._wrap_into_parentheses(result, options, context) + + return result diff --git a/hcl2/rules/for_expressions.py b/hcl2/rules/for_expressions.py new file mode 100644 index 00000000..a1f24dcb --- /dev/null +++ b/hcl2/rules/for_expressions.py @@ -0,0 +1,299 @@ +from typing import Any, Tuple, Optional, List + +from lark.tree import Meta + +from hcl2.rules.abstract import LarkRule, LarkElement +from hcl2.rules.expressions import ExpressionRule +from hcl2.rules.literal_rules import IdentifierRule +from hcl2.rules.tokens import ( + LSQB, + RSQB, + LBRACE, + RBRACE, + FOR, + IN, + IF, + COMMA, + COLON, + ELLIPSIS, + FOR_OBJECT_ARROW, +) +from hcl2.rules.whitespace import ( + NewLineOrCommentRule, + InlineCommentMixIn, +) +from hcl2.utils import ( + SerializationOptions, + SerializationContext, + to_dollar_string, +) + + +class ForIntroRule(InlineCommentMixIn): + """Rule for the intro part of for expressions: 'for key, value in collection :'""" + + _children: Tuple[ + FOR, + Optional[NewLineOrCommentRule], + IdentifierRule, + Optional[COMMA], + Optional[IdentifierRule], + Optional[NewLineOrCommentRule], + IN, + Optional[NewLineOrCommentRule], + ExpressionRule, + Optional[NewLineOrCommentRule], + COLON, + Optional[NewLineOrCommentRule], + ] + + @staticmethod + def lark_name() -> str: + return "for_intro" + + def __init__(self, children, meta: Optional[Meta] = None): + + self._insert_optionals(children) + super().__init__(children, meta) + + def _insert_optionals(self, children: List, indexes: List[int] = None): + identifiers = [child for child in children if isinstance(child, IdentifierRule)] + second_identifier = identifiers[1] if len(identifiers) == 2 else None + + indexes = [1, 5, 7, 9, 11] + if second_identifier is None: + indexes.extend([3, 4]) + + super()._insert_optionals(children, sorted(indexes)) + + if second_identifier is not None: + children[3] = COMMA() + children[4] = second_identifier + + @property + def first_iterator(self) -> IdentifierRule: + """Returns the first iterator""" + return self._children[2] + + @property + def second_iterator(self) -> Optional[IdentifierRule]: + """Returns the second iterator or None if not present""" + return self._children[4] + + @property + def iterable(self) -> ExpressionRule: + """Returns the collection expression being iterated over""" + return self._children[8] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> str: + result = "for " + + result += f"{self.first_iterator.serialize(options, context)}" + if self.second_iterator: + result += f", {self.second_iterator.serialize(options, context)}" + + result += f" in {self.iterable.serialize(options, context)} : " + return result + + +class ForCondRule(InlineCommentMixIn): + """Rule for the optional condition in for expressions: 'if condition'""" + + _children: Tuple[ + IF, + Optional[NewLineOrCommentRule], + ExpressionRule, # condition expression + ] + + @staticmethod + def lark_name() -> str: + return "for_cond" + + def __init__(self, children, meta: Optional[Meta] = None): + self._insert_optionals(children, [1]) + super().__init__(children, meta) + + @property + def condition_expr(self) -> ExpressionRule: + """Returns the condition expression""" + return self._children[2] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> str: + return f"if {self.condition_expr.serialize(options, context)}" + + +class ForTupleExprRule(ExpressionRule): + """Rule for tuple/array for expressions: [for item in items : expression]""" + + _children: Tuple[ + LSQB, + Optional[NewLineOrCommentRule], + ForIntroRule, + Optional[NewLineOrCommentRule], + ExpressionRule, + Optional[NewLineOrCommentRule], + Optional[ForCondRule], + Optional[NewLineOrCommentRule], + RSQB, + ] + + @staticmethod + def lark_name() -> str: + return "for_tuple_expr" + + def __init__(self, children, meta: Optional[Meta] = None): + self._insert_optionals(children) + super().__init__(children, meta) + + def _insert_optionals(self, children: List, indexes: List[int] = None): + condition = None + + for child in children: + if isinstance(child, ForCondRule): + condition = child + break + + indexes = [1, 3, 5, 7] + + if condition is None: + indexes.append(6) + + super()._insert_optionals(children, sorted(indexes)) + + children[6] = condition + + @property + def for_intro(self) -> ForIntroRule: + """Returns the for intro rule""" + return self._children[2] + + @property + def value_expr(self) -> ExpressionRule: + """Returns the value expression""" + return self._children[4] + + @property + def condition(self) -> Optional[ForCondRule]: + """Returns the optional condition rule""" + return self._children[6] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + + result = "[" + + with context.modify(inside_dollar_string=True): + result += self.for_intro.serialize(options, context) + result += self.value_expr.serialize(options, context) + + if self.condition is not None: + result += f" {self.condition.serialize(options, context)}" + + result += "]" + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result + + +class ForObjectExprRule(ExpressionRule): + """Rule for object for expressions: {for key, value in items : key => value}""" + + _children: Tuple[ + LBRACE, + Optional[NewLineOrCommentRule], + ForIntroRule, + Optional[NewLineOrCommentRule], + ExpressionRule, + FOR_OBJECT_ARROW, + Optional[NewLineOrCommentRule], + ExpressionRule, + Optional[NewLineOrCommentRule], + Optional[ELLIPSIS], + Optional[NewLineOrCommentRule], + Optional[ForCondRule], + Optional[NewLineOrCommentRule], + RBRACE, + ] + + @staticmethod + def lark_name() -> str: + return "for_object_expr" + + def __init__(self, children, meta: Optional[Meta] = None): + self._insert_optionals(children) + super().__init__(children, meta) + + def _insert_optionals(self, children: List, indexes: List[int] = None): + ellipsis_ = None + condition = None + + for child in children: + if ellipsis_ is None and isinstance(child, ELLIPSIS): + ellipsis_ = child + if condition is None and isinstance(child, ForCondRule): + condition = child + + indexes = [1, 3, 6, 8, 10, 12] + + if ellipsis_ is None: + indexes.append(9) + if condition is None: + indexes.append(11) + + super()._insert_optionals(children, sorted(indexes)) + + children[9] = ellipsis_ + children[11] = condition + + @property + def for_intro(self) -> ForIntroRule: + """Returns the for intro rule""" + return self._children[2] + + @property + def key_expr(self) -> ExpressionRule: + """Returns the key expression""" + return self._children[4] + + @property + def value_expr(self) -> ExpressionRule: + """Returns the value expression""" + return self._children[7] + + @property + def ellipsis(self) -> Optional[ELLIPSIS]: + """Returns the optional ellipsis token""" + return self._children[9] + + @property + def condition(self) -> Optional[ForCondRule]: + """Returns the optional condition rule""" + return self._children[11] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + + result = "{" + with context.modify(inside_dollar_string=True): + result += self.for_intro.serialize(options, context) + result += f"{self.key_expr.serialize(options, context)} => " + + result += self.value_expr.serialize( + SerializationOptions(wrap_objects=True), context + ) + if self.ellipsis is not None: + result += self.ellipsis.serialize(options, context) + + if self.condition is not None: + result += f" {self.condition.serialize(options, context)}" + + result += "}" + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result diff --git a/hcl2/rules/functions.py b/hcl2/rules/functions.py new file mode 100644 index 00000000..380b959b --- /dev/null +++ b/hcl2/rules/functions.py @@ -0,0 +1,116 @@ +from functools import lru_cache +from typing import Any, Optional, Tuple, Union, List + +from hcl2.rules.expressions import ExpressionRule +from hcl2.rules.literal_rules import IdentifierRule +from hcl2.rules.tokens import COMMA, ELLIPSIS, StringToken, LPAR, RPAR +from hcl2.rules.whitespace import ( + InlineCommentMixIn, + NewLineOrCommentRule, +) +from hcl2.utils import ( + SerializationOptions, + SerializationContext, + to_dollar_string, +) + + +class ArgumentsRule(InlineCommentMixIn): + + _children: Tuple[ + ExpressionRule, + Tuple[ + Optional[NewLineOrCommentRule], + COMMA, + Optional[NewLineOrCommentRule], + ExpressionRule, + # ... + ], + Optional[Union[COMMA, ELLIPSIS]], + Optional[NewLineOrCommentRule], + ] + + @staticmethod + def lark_name() -> str: + return "arguments" + + @property + @lru_cache(maxsize=None) + def has_ellipsis(self) -> bool: + for child in self._children[-2:]: + if isinstance(child, StringToken) and child.lark_name() == "ELLIPSIS": + return True + return False + + @property + def arguments(self) -> List[ExpressionRule]: + return [child for child in self._children if isinstance(child, ExpressionRule)] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + result = ", ".join( + [str(argument.serialize(options, context)) for argument in self.arguments] + ) + if self.has_ellipsis: + result += " ..." + return result + + +class FunctionCallRule(InlineCommentMixIn): + + _children: Tuple[ + IdentifierRule, + Optional[IdentifierRule], + Optional[IdentifierRule], + LPAR, + Optional[NewLineOrCommentRule], + Optional[ArgumentsRule], + Optional[NewLineOrCommentRule], + RPAR, + ] + + @staticmethod + def lark_name() -> str: + return "function_call" + + @property + @lru_cache(maxsize=None) + def identifiers(self) -> List[IdentifierRule]: + return [child for child in self._children if isinstance(child, IdentifierRule)] + + @property + @lru_cache(maxsize=None) + def arguments(self) -> Optional[ArgumentsRule]: + for child in self._children[2:6]: + if isinstance(child, ArgumentsRule): + return child + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + with context.modify(inside_dollar_string=True): + result = f"{'::'.join(identifier.serialize(options, context) for identifier in self.identifiers)}" + result += f"({self.arguments.serialize(options, context) if self.arguments else ''})" + + if not context.inside_dollar_string: + result = to_dollar_string(result) + + return result + + +class ProviderFunctionCallRule(FunctionCallRule): + _children: Tuple[ + IdentifierRule, + IdentifierRule, + IdentifierRule, + LPAR, + Optional[NewLineOrCommentRule], + Optional[ArgumentsRule], + Optional[NewLineOrCommentRule], + RPAR, + ] + + @staticmethod + def lark_name() -> str: + return "provider_function_call" diff --git a/hcl2/rules/indexing.py b/hcl2/rules/indexing.py new file mode 100644 index 00000000..fc8cbf90 --- /dev/null +++ b/hcl2/rules/indexing.py @@ -0,0 +1,240 @@ +from typing import List, Optional, Tuple, Any, Union + +from lark.tree import Meta + +from hcl2.rules.abstract import LarkRule +from hcl2.rules.expressions import ExprTermRule, ExpressionRule +from hcl2.rules.literal_rules import IdentifierRule +from hcl2.rules.tokens import ( + DOT, + IntLiteral, + LSQB, + RSQB, + ATTR_SPLAT, +) +from hcl2.rules.whitespace import ( + InlineCommentMixIn, + NewLineOrCommentRule, +) +from hcl2.utils import ( + SerializationOptions, + to_dollar_string, + SerializationContext, +) + + +class ShortIndexRule(LarkRule): + + _children: Tuple[ + DOT, + IntLiteral, + ] + + @staticmethod + def lark_name() -> str: + return "short_index" + + @property + def index(self): + return self.children[1] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return f".{self.index.serialize(options)}" + + +class SqbIndexRule(InlineCommentMixIn): + _children: Tuple[ + LSQB, + Optional[NewLineOrCommentRule], + ExprTermRule, + Optional[NewLineOrCommentRule], + RSQB, + ] + + @staticmethod + def lark_name() -> str: + return "braces_index" + + @property + def index_expression(self): + return self.children[2] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return f"[{self.index_expression.serialize(options)}]" + + def __init__(self, children, meta: Optional[Meta] = None): + self._insert_optionals(children, [1, 3]) + super().__init__(children, meta) + + +class IndexExprTermRule(ExpressionRule): + + _children: Tuple[ExprTermRule, SqbIndexRule] + + @staticmethod + def lark_name() -> str: + return "index_expr_term" + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + with context.modify(inside_dollar_string=True): + result = f"{self.children[0].serialize(options)}{self.children[1].serialize(options)}" + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result + + +class GetAttrRule(LarkRule): + + _children: Tuple[ + DOT, + IdentifierRule, + ] + + @staticmethod + def lark_name() -> str: + return "get_attr" + + @property + def identifier(self) -> IdentifierRule: + return self._children[1] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return f".{self.identifier.serialize(options, context)}" + + +class GetAttrExprTermRule(ExpressionRule): + + _children: Tuple[ + ExprTermRule, + GetAttrRule, + ] + + @staticmethod + def lark_name() -> str: + return "get_attr_expr_term" + + @property + def expr_term(self) -> ExprTermRule: + return self._children[0] + + @property + def get_attr(self) -> GetAttrRule: + return self._children[1] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + with context.modify(inside_dollar_string=True): + result = f"{self.expr_term.serialize(options, context)}{self.get_attr.serialize(options, context)}" + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result + + +class AttrSplatRule(LarkRule): + _children: Tuple[ + ATTR_SPLAT, + Tuple[Union[GetAttrRule, Union[SqbIndexRule, ShortIndexRule]], ...], + ] + + @staticmethod + def lark_name() -> str: + return "attr_splat" + + @property + def get_attrs( + self, + ) -> List[Union[GetAttrRule, Union[SqbIndexRule, ShortIndexRule]]]: + return self._children[1:] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return ".*" + "".join( + get_attr.serialize(options, context) for get_attr in self.get_attrs + ) + + +class AttrSplatExprTermRule(ExpressionRule): + + _children: Tuple[ExprTermRule, AttrSplatRule] + + @staticmethod + def lark_name() -> str: + return "attr_splat_expr_term" + + @property + def expr_term(self) -> ExprTermRule: + return self._children[0] + + @property + def attr_splat(self) -> AttrSplatRule: + return self._children[1] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + with context.modify(inside_dollar_string=True): + result = f"{self.expr_term.serialize(options, context)}{self.attr_splat.serialize(options, context)}" + + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result + + +class FullSplatRule(LarkRule): + _children: Tuple[ + ATTR_SPLAT, + Tuple[Union[GetAttrRule, Union[SqbIndexRule, ShortIndexRule]], ...], + ] + + @staticmethod + def lark_name() -> str: + return "full_splat" + + @property + def get_attrs( + self, + ) -> List[Union[GetAttrRule, Union[SqbIndexRule, ShortIndexRule]]]: + return self._children[1:] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return "[*]" + "".join( + get_attr.serialize(options, context) for get_attr in self.get_attrs + ) + + +class FullSplatExprTermRule(ExpressionRule): + _children: Tuple[ExprTermRule, FullSplatRule] + + @staticmethod + def lark_name() -> str: + return "full_splat_expr_term" + + @property + def expr_term(self) -> ExprTermRule: + return self._children[0] + + @property + def attr_splat(self) -> FullSplatRule: + return self._children[1] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + with context.modify(inside_dollar_string=True): + result = f"{self.expr_term.serialize(options, context)}{self.attr_splat.serialize(options, context)}" + + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result diff --git a/hcl2/rules/literal_rules.py b/hcl2/rules/literal_rules.py new file mode 100644 index 00000000..2e5b8281 --- /dev/null +++ b/hcl2/rules/literal_rules.py @@ -0,0 +1,49 @@ +from abc import ABC +from typing import Any, Tuple + +from hcl2.rules.abstract import LarkRule, LarkToken +from hcl2.utils import SerializationOptions, SerializationContext + + +class TokenRule(LarkRule, ABC): + + _children: Tuple[LarkToken] + + @property + def token(self) -> LarkToken: + return self._children[0] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return self.token.serialize() + + +class KeywordRule(TokenRule): + @staticmethod + def lark_name() -> str: + return "keyword" + + +class IdentifierRule(TokenRule): + @staticmethod + def lark_name() -> str: + return "identifier" + + +class IntLitRule(TokenRule): + @staticmethod + def lark_name() -> str: + return "int_lit" + + +class FloatLitRule(TokenRule): + @staticmethod + def lark_name() -> str: + return "float_lit" + + +class BinaryOperatorRule(TokenRule): + @staticmethod + def lark_name() -> str: + return "binary_operator" diff --git a/hcl2/rules/strings.py b/hcl2/rules/strings.py new file mode 100644 index 00000000..248ab173 --- /dev/null +++ b/hcl2/rules/strings.py @@ -0,0 +1,146 @@ +import sys +from typing import Tuple, List, Any, Union + +from hcl2.rules.abstract import LarkRule +from hcl2.rules.expressions import ExpressionRule +from hcl2.rules.tokens import ( + INTERP_START, + RBRACE, + DBLQUOTE, + STRING_CHARS, + ESCAPED_INTERPOLATION, + HEREDOC_TEMPLATE, + HEREDOC_TRIM_TEMPLATE, +) +from hcl2.utils import ( + SerializationOptions, + SerializationContext, + to_dollar_string, + HEREDOC_TRIM_PATTERN, + HEREDOC_PATTERN, +) + + +class InterpolationRule(LarkRule): + + _children: Tuple[ + INTERP_START, + ExpressionRule, + RBRACE, + ] + + @staticmethod + def lark_name() -> str: + return "interpolation" + + @property + def expression(self): + return self.children[1] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return to_dollar_string(self.expression.serialize(options)) + + +class StringPartRule(LarkRule): + _children: Tuple[Union[STRING_CHARS, ESCAPED_INTERPOLATION, InterpolationRule]] + + @staticmethod + def lark_name() -> str: + return "string_part" + + @property + def content(self) -> Union[STRING_CHARS, ESCAPED_INTERPOLATION, InterpolationRule]: + return self._children[0] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return self.content.serialize(options, context) + + +class StringRule(LarkRule): + + _children: Tuple[DBLQUOTE, List[StringPartRule], DBLQUOTE] + + @staticmethod + def lark_name() -> str: + return "string" + + @property + def string_parts(self): + return self.children[1:-1] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return '"' + "".join(part.serialize() for part in self.string_parts) + '"' + + +class HeredocTemplateRule(LarkRule): + + _children: Tuple[HEREDOC_TEMPLATE] + _trim_chars = "\n\t " + + + @staticmethod + def lark_name() -> str: + return "heredoc_template" + + @property + def heredoc(self): + return self.children[0] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + heredoc = self.heredoc.serialize(options, context) + + if not options.preserve_heredocs: + match = HEREDOC_PATTERN.match(heredoc) + if not match: + raise RuntimeError(f"Invalid Heredoc token: {heredoc}") + heredoc = match.group(2) + + result = heredoc.rstrip(self._trim_chars) + return f'"{result}"' + + +class HeredocTrimTemplateRule(HeredocTemplateRule): + + _children: Tuple[HEREDOC_TRIM_TEMPLATE] + + @staticmethod + def lark_name() -> str: + return "heredoc_trim_template" + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + # See https://github.com/hashicorp/hcl2/blob/master/hcl/hclsyntax/spec.md#template-expressions + # This is a special version of heredocs that are declared with "<<-" + # This will calculate the minimum number of leading spaces in each line of a heredoc + # and then remove that number of spaces from each line + + heredoc = self.heredoc.serialize(options, context) + + if not options.preserve_heredocs: + match = HEREDOC_TRIM_PATTERN.match(heredoc) + if not match: + raise RuntimeError(f"Invalid Heredoc token: {heredoc}") + heredoc = match.group(2) + + heredoc = heredoc.rstrip(self._trim_chars) + lines = heredoc.split("\n") + + # calculate the min number of leading spaces in each line + min_spaces = sys.maxsize + for line in lines: + leading_spaces = len(line) - len(line.lstrip(" ")) + min_spaces = min(min_spaces, leading_spaces) + + # trim off that number of leading spaces from each line + lines = [line[min_spaces:] for line in lines] + return '"' + "\n".join(lines) + '"' + \ No newline at end of file diff --git a/hcl2/rules/tokens.py b/hcl2/rules/tokens.py new file mode 100644 index 00000000..b02be66e --- /dev/null +++ b/hcl2/rules/tokens.py @@ -0,0 +1,123 @@ +from functools import lru_cache +from typing import Callable, Any, Type, Optional, Tuple + +from hcl2.rules.abstract import LarkToken + + +class StringToken(LarkToken): + """ + Single run-time base class; every `StringToken["..."]` call returns a + cached subclass whose static `lark_name()` yields the given string. + """ + + @classmethod + @lru_cache(maxsize=None) + def __build_subclass(cls, name: str) -> Type["StringToken"]: + """Create a subclass with a constant `lark_name`.""" + return type( # type: ignore + f"{name}_TOKEN", + (StringToken,), + { + "__slots__": (), + "lark_name": staticmethod(lambda _n=name: _n), + }, + ) + + def __class_getitem__(cls, name: str) -> Type["StringToken"]: + if not isinstance(name, str): + raise TypeError("StringToken[...] expects a single str argument") + return cls.__build_subclass(name) + + def __init__(self, value: Optional[Any] = None): + super().__init__(value) + + @property + def serialize_conversion(self) -> Callable[[Any], str]: + return str + + +class StaticStringToken(LarkToken): + + classes_by_value = {} + + @classmethod + @lru_cache(maxsize=None) + def __build_subclass( + cls, name: str, default_value: str = None + ) -> Type["StringToken"]: + """Create a subclass with a constant `lark_name`.""" + + result = type( # type: ignore + f"{name}_TOKEN", + (cls,), + { + "__slots__": (), + "lark_name": staticmethod(lambda _n=name: _n), + "_default_value": default_value, + }, + ) + cls.classes_by_value[default_value] = result + return result + + def __class_getitem__(cls, value: Tuple[str, str]) -> Type["StringToken"]: + name, default_value = value + return cls.__build_subclass(name, default_value) + + def __init__(self): + super().__init__(getattr(self, "_default_value")) + + @property + def serialize_conversion(self) -> Callable[[Any], str]: + return str + + +# explicitly define various kinds of string-based tokens for type hinting +# variable values +NAME = StringToken["NAME"] +STRING_CHARS = StringToken["STRING_CHARS"] +ESCAPED_INTERPOLATION = StringToken["ESCAPED_INTERPOLATION"] +BINARY_OP = StringToken["BINARY_OP"] +HEREDOC_TEMPLATE = StringToken["HEREDOC_TEMPLATE"] +HEREDOC_TRIM_TEMPLATE = StringToken["HEREDOC_TRIM_TEMPLATE"] +NL_OR_COMMENT = StringToken["NL_OR_COMMENT"] +# static values +EQ = StaticStringToken[("EQ", "=")] +COLON = StaticStringToken[("COLON", ":")] +LPAR = StaticStringToken[("LPAR", "(")] +RPAR = StaticStringToken[("RPAR", ")")] +LBRACE = StaticStringToken[("LBRACE", "{")] +RBRACE = StaticStringToken[("RBRACE", "}")] +DOT = StaticStringToken[("DOT", ".")] +COMMA = StaticStringToken[("COMMA", ",")] +ELLIPSIS = StaticStringToken[("ELLIPSIS", "...")] +QMARK = StaticStringToken[("QMARK", "?")] +LSQB = StaticStringToken[("LSQB", "[")] +RSQB = StaticStringToken[("RSQB", "]")] +INTERP_START = StaticStringToken[("INTERP_START", "${")] +DBLQUOTE = StaticStringToken[("DBLQUOTE", '"')] +ATTR_SPLAT = StaticStringToken[("ATTR_SPLAT", ".*")] +FULL_SPLAT = StaticStringToken[("FULL_SPLAT", "[*]")] +FOR = StaticStringToken[("FOR", "for")] +IN = StaticStringToken[("IN", "in")] +IF = StaticStringToken[("IF", "if")] +FOR_OBJECT_ARROW = StaticStringToken[("FOR_OBJECT_ARROW", "=>")] + + +class IntLiteral(LarkToken): + @staticmethod + def lark_name() -> str: + return "INT_LITERAL" + + @property + def serialize_conversion(self) -> Callable: + return int + + +class FloatLiteral(LarkToken): + @staticmethod + def lark_name() -> str: + return "FLOAT_LITERAL" + + @property + def serialize_conversion(self) -> Callable: + return float diff --git a/hcl2/rules/whitespace.py b/hcl2/rules/whitespace.py new file mode 100644 index 00000000..5f2fa886 --- /dev/null +++ b/hcl2/rules/whitespace.py @@ -0,0 +1,73 @@ +from abc import ABC +from typing import Optional, List, Any, Tuple + +from hcl2.rules.abstract import LarkToken, LarkRule +from hcl2.rules.literal_rules import TokenRule +from hcl2.utils import SerializationOptions, SerializationContext + + +class NewLineOrCommentRule(TokenRule): + @staticmethod + def lark_name() -> str: + return "new_line_or_comment" + + @classmethod + def from_string(cls, string: str) -> "NewLineOrCommentRule": + return cls([LarkToken("NL_OR_COMMENT", string)]) + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return self.token.serialize() + + def to_list( + self, options: SerializationOptions = SerializationOptions() + ) -> Optional[List[str]]: + comment = self.serialize(options) + if comment == "\n": + return None + + comments = comment.split("\n") + + result = [] + for comment in comments: + comment = comment.strip() + + for delimiter in ("//", "/*", "#"): + + if comment.startswith(delimiter): + comment = comment[len(delimiter) :] + + if comment.endswith("*/"): + comment = comment[:-2] + + if comment != "": + result.append(comment.strip()) + + return result + + +class InlineCommentMixIn(LarkRule, ABC): + def _insert_optionals(self, children: List, indexes: List[int] = None): + for index in indexes: + try: + child = children[index] + except IndexError: + children.insert(index, None) + else: + if not isinstance(child, NewLineOrCommentRule): + children.insert(index, None) + + def inline_comments(self): + result = [] + for child in self._children: + + if isinstance(child, NewLineOrCommentRule): + comments = child.to_list() + if comments is not None: + result.extend(comments) + + elif isinstance(child, InlineCommentMixIn): + result.extend(child.inline_comments()) + + return result diff --git a/hcl2/transformer.py b/hcl2/transformer.py index 382092d6..7de4f7e1 100644 --- a/hcl2/transformer.py +++ b/hcl2/transformer.py @@ -1,399 +1,272 @@ -"""A Lark Transformer for transforming a Lark parse tree into a Python dict""" -import json -import re -import sys -from collections import namedtuple -from typing import List, Dict, Any - -from lark import Token +# pylint: disable=missing-function-docstring,unused-argument +from lark import Token, Tree, v_args, Transformer, Discard from lark.tree import Meta -from lark.visitors import Transformer, Discard, _DiscardType, v_args -from .reconstructor import reverse_quotes_within_interpolation +from hcl2.rules.base import ( + StartRule, + BodyRule, + BlockRule, + AttributeRule, +) +from hcl2.rules.containers import ( + ObjectRule, + ObjectElemRule, + ObjectElemKeyRule, + TupleRule, + ObjectElemKeyExpressionRule, + ObjectElemKeyDotAccessor, +) +from hcl2.rules.expressions import ( + BinaryTermRule, + UnaryOpRule, + BinaryOpRule, + ExprTermRule, + ConditionalRule, +) +from hcl2.rules.for_expressions import ( + ForTupleExprRule, + ForObjectExprRule, + ForIntroRule, + ForCondRule, +) +from hcl2.rules.functions import ArgumentsRule, FunctionCallRule +from hcl2.rules.indexing import ( + IndexExprTermRule, + SqbIndexRule, + ShortIndexRule, + GetAttrRule, + GetAttrExprTermRule, + AttrSplatExprTermRule, + AttrSplatRule, + FullSplatRule, + FullSplatExprTermRule, +) +from hcl2.rules.literal_rules import ( + FloatLitRule, + IntLitRule, + IdentifierRule, + BinaryOperatorRule, + KeywordRule, +) +from hcl2.rules.strings import ( + InterpolationRule, + StringRule, + StringPartRule, + HeredocTemplateRule, + HeredocTrimTemplateRule, +) +from hcl2.rules.tokens import ( + NAME, + IntLiteral, + FloatLiteral, + StringToken, + StaticStringToken, +) +from hcl2.rules.whitespace import NewLineOrCommentRule + + +class RuleTransformer(Transformer): + """Takes a syntax tree generated by the parser and + transforms it to a tree of LarkRule instances + """ + with_meta: bool -HEREDOC_PATTERN = re.compile(r"<<([a-zA-Z][a-zA-Z0-9._-]+)\n([\s\S]*)\1", re.S) -HEREDOC_TRIM_PATTERN = re.compile(r"<<-([a-zA-Z][a-zA-Z0-9._-]+)\n([\s\S]*)\1", re.S) + def transform(self, tree: Tree) -> StartRule: + return super().transform(tree) + def __init__(self, discard_new_line_or_comments: bool = False): + super().__init__() + self.discard_new_line_or_comments = discard_new_line_or_comments -START_LINE = "__start_line__" -END_LINE = "__end_line__" + def __default_token__(self, token: Token) -> StringToken: + # TODO make this return StaticStringToken where applicable + if token.value in StaticStringToken.classes_by_value.keys(): + return StaticStringToken.classes_by_value[token.value]() + return StringToken[token.type](token.value) + def FLOAT_LITERAL(self, token: Token) -> FloatLiteral: + return FloatLiteral(token.value) -Attribute = namedtuple("Attribute", ("key", "value")) + def NAME(self, token: Token) -> NAME: + return NAME(token.value) + def INT_LITERAL(self, token: Token) -> IntLiteral: + return IntLiteral(token.value) -# pylint: disable=missing-function-docstring,unused-argument -class DictTransformer(Transformer): - """Takes a syntax tree generated by the parser and - transforms it to a dict. - """ + @v_args(meta=True) + def start(self, meta: Meta, args) -> StartRule: + return StartRule(args, meta) - with_meta: bool + @v_args(meta=True) + def body(self, meta: Meta, args) -> BodyRule: + return BodyRule(args, meta) - @staticmethod - def is_type_keyword(value: str) -> bool: - return value in {"bool", "number", "string"} + @v_args(meta=True) + def block(self, meta: Meta, args) -> BlockRule: + return BlockRule(args, meta) - def __init__(self, with_meta: bool = False): - """ - :param with_meta: If set to true then adds `__start_line__` and `__end_line__` - parameters to the output dict. Default to false. - """ - self.with_meta = with_meta - super().__init__() + @v_args(meta=True) + def attribute(self, meta: Meta, args) -> AttributeRule: + # _attribute_name is flattened, so args[0] may be KeywordRule or IdentifierRule + if isinstance(args[0], KeywordRule): + args[0] = IdentifierRule([NAME(args[0].token.value)], meta) + return AttributeRule(args, meta) + + @v_args(meta=True) + def new_line_or_comment(self, meta: Meta, args) -> NewLineOrCommentRule: + if self.discard_new_line_or_comments: + return Discard + return NewLineOrCommentRule(args, meta) + + @v_args(meta=True) + def identifier(self, meta: Meta, args) -> IdentifierRule: + return IdentifierRule(args, meta) + + @v_args(meta=True) + def keyword(self, meta: Meta, args) -> KeywordRule: + return KeywordRule(args, meta) + + @v_args(meta=True) + def int_lit(self, meta: Meta, args) -> IntLitRule: + return IntLitRule(args, meta) - def float_lit(self, args: List) -> float: - value = "".join([self.to_tf_inline(arg) for arg in args]) - if "e" in value: - return self.to_string_dollar(value) - return float(value) - - def int_lit(self, args: List) -> int: - return int("".join([self.to_tf_inline(arg) for arg in args])) + @v_args(meta=True) + def float_lit(self, meta: Meta, args) -> FloatLitRule: + return FloatLitRule(args, meta) + + @v_args(meta=True) + def string(self, meta: Meta, args) -> StringRule: + return StringRule(args, meta) + + @v_args(meta=True) + def string_part(self, meta: Meta, args) -> StringPartRule: + return StringPartRule(args, meta) + + @v_args(meta=True) + def interpolation(self, meta: Meta, args) -> InterpolationRule: + return InterpolationRule(args, meta) + + @v_args(meta=True) + def heredoc_template(self, meta: Meta, args) -> HeredocTemplateRule: + return HeredocTemplateRule(args, meta) + + @v_args(meta=True) + def heredoc_template_trim(self, meta: Meta, args) -> HeredocTrimTemplateRule: + return HeredocTrimTemplateRule(args, meta) - def expr_term(self, args: List) -> Any: - args = self.strip_new_line_tokens(args) - - if args[0] == "true": - return True - if args[0] == "false": - return False - if args[0] == "null": - return None - - if args[0] == "(" and args[-1] == ")": - return "".join(str(arg) for arg in args) - - return args[0] - - def index_expr_term(self, args: List) -> str: - args = self.strip_new_line_tokens(args) - return f"{args[0]}{args[1]}" - - def index(self, args: List) -> str: - args = self.strip_new_line_tokens(args) - return f"[{args[0]}]" - - def get_attr_expr_term(self, args: List) -> str: - return f"{args[0]}{args[1]}" - - def get_attr(self, args: List) -> str: - return f".{args[0]}" - - def attr_splat_expr_term(self, args: List) -> str: - return f"{args[0]}{args[1]}" - - def attr_splat(self, args: List) -> str: - args_str = "".join(self.to_tf_inline(arg) for arg in args) - return f".*{args_str}" - - def full_splat_expr_term(self, args: List) -> str: - return f"{args[0]}{args[1]}" - - def full_splat(self, args: List) -> str: - args_str = "".join(self.to_tf_inline(arg) for arg in args) - return f"[*]{args_str}" - - def tuple(self, args: List) -> List: - return [self.to_string_dollar(arg) for arg in self.strip_new_line_tokens(args)] - - def object_elem(self, args: List) -> Dict: - # This returns a dict with a single key/value pair to make it easier to merge these - # into a bigger dict that is returned by the "object" function - - key = str(args[0].children[0]) - if not re.match(r".*?(\${).*}.*", key): - # do not strip quotes of a interpolation string - key = self.strip_quotes(key) - - value = self.to_string_dollar(args[2]) - return {key: value} - - def object_elem_key_dot_accessor(self, args: List) -> str: - return "".join(args) - - def object_elem_key_expression(self, args: List) -> str: - return self.to_string_dollar("".join(args)) - - def object(self, args: List) -> Dict: - args = self.strip_new_line_tokens(args) - result: Dict[str, Any] = {} - for arg in args: - if ( - isinstance(arg, Token) and arg.type == "COMMA" - ): # skip optional comma at the end of object element - continue - - result.update(arg) - return result - - def function_call(self, args: List) -> str: - args = self.strip_new_line_tokens(args) - args_str = "" - if len(args) > 1: - args_str = ", ".join( - [self.to_tf_inline(arg) for arg in args[1] if arg is not Discard] - ) - return f"{args[0]}({args_str})" - - def provider_function_call(self, args: List) -> str: - args = self.strip_new_line_tokens(args) - args_str = "" - if len(args) > 5: - args_str = ", ".join( - [self.to_tf_inline(arg) for arg in args[5] if arg is not Discard] - ) - provider_func = "::".join([args[0], args[2], args[4]]) - return f"{provider_func}({args_str})" - - def arguments(self, args: List) -> List: - return self.process_nulls(args) - - @v_args(meta=True) - def block(self, meta: Meta, args: List) -> Dict: - *block_labels, block_body = args - result: Dict[str, Any] = block_body - if self.with_meta: - result.update( - { - START_LINE: meta.line, - END_LINE: meta.end_line, - } - ) - - # create nested dict. i.e. {label1: {label2: {labelN: result}}} - for label in reversed(block_labels): - label_str = self.strip_quotes(label) - result = {label_str: result} - - return result - - def attribute(self, args: List) -> Attribute: - key = str(args[0]) - if key.startswith('"') and key.endswith('"'): - key = key[1:-1] - value = self.to_string_dollar(args[2]) - return Attribute(key, value) - - def conditional(self, args: List) -> str: - args = self.strip_new_line_tokens(args) - args = self.process_nulls(args) - return f"{args[0]} ? {args[1]} : {args[2]}" - - def binary_op(self, args: List) -> str: - return " ".join( - [self.unwrap_string_dollar(self.to_tf_inline(arg)) for arg in args] - ) - - def unary_op(self, args: List) -> str: - args = self.process_nulls(args) - return "".join([self.to_tf_inline(arg) for arg in args]) - - def binary_term(self, args: List) -> str: - args = self.strip_new_line_tokens(args) - args = self.process_nulls(args) - return " ".join([self.to_tf_inline(arg) for arg in args]) - - def body(self, args: List) -> Dict[str, List]: - # See https://github.com/hashicorp/hcl/blob/main/hclsyntax/spec.md#bodies - # --- - # A body is a collection of associated attributes and blocks. - # - # An attribute definition assigns a value to a particular attribute - # name within a body. Each distinct attribute name may be defined no - # more than once within a single body. - # - # A block creates a child body that is annotated with a block type and - # zero or more block labels. Blocks create a structural hierarchy which - # can be interpreted by the calling application. - # --- - # - # There can be more than one child body with the same block type and - # labels. This means that all blocks (even when there is only one) - # should be transformed into lists of blocks. - args = self.strip_new_line_tokens(args) - attributes = set() - result: Dict[str, Any] = {} - for arg in args: - if isinstance(arg, Attribute): - if arg.key in result: - raise RuntimeError(f"{arg.key} already defined") - result[arg.key] = arg.value - attributes.add(arg.key) - else: - # This is a block. - for key, value in arg.items(): - key = str(key) - if key in result: - if key in attributes: - raise RuntimeError(f"{key} already defined") - result[key].append(value) - else: - result[key] = [value] - - return result - - def start(self, args: List) -> Dict: - args = self.strip_new_line_tokens(args) - return args[0] - - def binary_operator(self, args: List) -> str: - return str(args[0]) - - def heredoc_template(self, args: List) -> str: - match = HEREDOC_PATTERN.match(str(args[0])) - if not match: - raise RuntimeError(f"Invalid Heredoc token: {args[0]}") - - trim_chars = "\n\t " - result = match.group(2).rstrip(trim_chars) - return f'"{result}"' - - def heredoc_template_trim(self, args: List) -> str: - # See https://github.com/hashicorp/hcl2/blob/master/hcl/hclsyntax/spec.md#template-expressions - # This is a special version of heredocs that are declared with "<<-" - # This will calculate the minimum number of leading spaces in each line of a heredoc - # and then remove that number of spaces from each line - match = HEREDOC_TRIM_PATTERN.match(str(args[0])) - if not match: - raise RuntimeError(f"Invalid Heredoc token: {args[0]}") - - trim_chars = "\n\t " - text = match.group(2).rstrip(trim_chars) - lines = text.split("\n") - - # calculate the min number of leading spaces in each line - min_spaces = sys.maxsize - for line in lines: - leading_spaces = len(line) - len(line.lstrip(" ")) - min_spaces = min(min_spaces, leading_spaces) - - # trim off that number of leading spaces from each line - lines = [line[min_spaces:] for line in lines] - - return '"%s"' % "\n".join(lines) - - def new_line_or_comment(self, args: List) -> _DiscardType: - return Discard - - def for_tuple_expr(self, args: List) -> str: - args = self.strip_new_line_tokens(args) - for_expr = " ".join([self.to_tf_inline(arg) for arg in args[1:-1]]) - return f"[{for_expr}]" - - def for_intro(self, args: List) -> str: - args = self.strip_new_line_tokens(args) - return " ".join([self.to_tf_inline(arg) for arg in args]) - - def for_cond(self, args: List) -> str: - args = self.strip_new_line_tokens(args) - return " ".join([self.to_tf_inline(arg) for arg in args]) - - def for_object_expr(self, args: List) -> str: - args = self.strip_new_line_tokens(args) - for_expr = " ".join([self.to_tf_inline(arg) for arg in args[1:-1]]) - # doubled curly braces stands for inlining the braces - # and the third pair of braces is for the interpolation - # e.g. f"{2 + 2} {{2 + 2}}" == "4 {2 + 2}" - return f"{{{for_expr}}}" - - def string(self, args: List) -> str: - return '"' + "".join(args) + '"' - - def string_part(self, args: List) -> str: - value = self.to_tf_inline(args[0]) - if value.startswith('"') and value.endswith('"'): - value = value[1:-1] - return value - - def interpolation(self, args: List) -> str: - return '"${' + str(args[0]) + '}"' - - def strip_new_line_tokens(self, args: List) -> List: - """ - Remove new line and Discard tokens. - The parser will sometimes include these in the tree so we need to strip them out here - """ - return [arg for arg in args if arg != "\n" and arg is not Discard] - - def is_string_dollar(self, value: str) -> bool: - if not isinstance(value, str): - return False - return value.startswith("${") and value.endswith("}") - - def to_string_dollar(self, value: Any) -> Any: - """Wrap a string in ${ and }""" - if not isinstance(value, str): - return value - # if it's already wrapped, pass it unmodified - if self.is_string_dollar(value): - return value - - if value.startswith('"') and value.endswith('"'): - value = str(value)[1:-1] - return self.process_escape_sequences(value) - - if self.is_type_keyword(value): - return value - - return f"${{{value}}}" - - def unwrap_string_dollar(self, value: str): - if self.is_string_dollar(value): - return value[2:-1] - return value - - def strip_quotes(self, value: Any) -> Any: - """Remove quote characters from the start and end of a string""" - if isinstance(value, str): - if value.startswith('"') and value.endswith('"'): - value = str(value)[1:-1] - return self.process_escape_sequences(value) - return value - - def process_escape_sequences(self, value: str) -> str: - """Process HCL escape sequences within quoted template expressions.""" - if isinstance(value, str): - # normal escape sequences - value = value.replace("\\n", "\n") - value = value.replace("\\r", "\r") - value = value.replace("\\t", "\t") - value = value.replace('\\"', '"') - value = value.replace("\\\\", "\\") - - # we will leave Unicode escapes (\uNNNN and \UNNNNNNNN) untouched - # for now, but this method can be extended in the future - return value - - def process_nulls(self, args: List) -> List: - return ["null" if arg is None else arg for arg in args] - - def to_tf_inline(self, value: Any) -> str: - """ - Converts complex objects (e.g.) dicts to an "inline" HCL syntax - for use in function calls and ${interpolation} strings - """ - if isinstance(value, dict): - dict_v = json.dumps(value) - return reverse_quotes_within_interpolation(dict_v) - if isinstance(value, list): - value = [self.to_tf_inline(item) for item in value] - return f"[{', '.join(value)}]" - if isinstance(value, bool): - return "true" if value else "false" - if isinstance(value, str): - return value - if isinstance(value, (int, float)): - return str(value) - if value is None: - return "None" - - raise RuntimeError(f"Invalid type to convert to inline HCL: {type(value)}") - - def identifier(self, value: Any) -> Any: - # Making identifier a token by capitalizing it to IDENTIFIER - # seems to return a token object instead of the str - # So treat it like a regular rule - # In this case we just convert the whole thing to a string - return str(value[0]) + @v_args(meta=True) + def expr_term(self, meta: Meta, args) -> ExprTermRule: + return ExprTermRule(args, meta) + + @v_args(meta=True) + def conditional(self, meta: Meta, args) -> ConditionalRule: + return ConditionalRule(args, meta) + + @v_args(meta=True) + def binary_operator(self, meta: Meta, args) -> BinaryOperatorRule: + return BinaryOperatorRule(args, meta) + + @v_args(meta=True) + def binary_term(self, meta: Meta, args) -> BinaryTermRule: + return BinaryTermRule(args, meta) + + @v_args(meta=True) + def unary_op(self, meta: Meta, args) -> UnaryOpRule: + return UnaryOpRule(args, meta) + + @v_args(meta=True) + def binary_op(self, meta: Meta, args) -> BinaryOpRule: + return BinaryOpRule(args, meta) + + @v_args(meta=True) + def tuple(self, meta: Meta, args) -> TupleRule: + return TupleRule(args, meta) + + @v_args(meta=True) + def object(self, meta: Meta, args) -> ObjectRule: + return ObjectRule(args, meta) + + @v_args(meta=True) + def object_elem(self, meta: Meta, args) -> ObjectElemRule: + return ObjectElemRule(args, meta) + + @v_args(meta=True) + def object_elem_key(self, meta: Meta, args) -> ObjectElemKeyRule: + return ObjectElemKeyRule(args, meta) + + @v_args(meta=True) + def object_elem_key_expression( + self, meta: Meta, args + ) -> ObjectElemKeyExpressionRule: + return ObjectElemKeyExpressionRule(args, meta) + + @v_args(meta=True) + def object_elem_key_dot_accessor( + self, meta: Meta, args + ) -> ObjectElemKeyDotAccessor: + return ObjectElemKeyDotAccessor(args, meta) + + @v_args(meta=True) + def arguments(self, meta: Meta, args) -> ArgumentsRule: + return ArgumentsRule(args, meta) + + @v_args(meta=True) + def function_call(self, meta: Meta, args) -> FunctionCallRule: + return FunctionCallRule(args, meta) + + # @v_args(meta=True) + # def provider_function_call(self, meta: Meta, args) -> ProviderFunctionCallRule: + # return ProviderFunctionCallRule(args, meta) + + @v_args(meta=True) + def index_expr_term(self, meta: Meta, args) -> IndexExprTermRule: + return IndexExprTermRule(args, meta) + + @v_args(meta=True) + def braces_index(self, meta: Meta, args) -> SqbIndexRule: + return SqbIndexRule(args, meta) + + @v_args(meta=True) + def short_index(self, meta: Meta, args) -> ShortIndexRule: + return ShortIndexRule(args, meta) + + @v_args(meta=True) + def get_attr(self, meta: Meta, args) -> GetAttrRule: + return GetAttrRule(args, meta) + + @v_args(meta=True) + def get_attr_expr_term(self, meta: Meta, args) -> GetAttrExprTermRule: + return GetAttrExprTermRule(args, meta) + + @v_args(meta=True) + def attr_splat(self, meta: Meta, args) -> AttrSplatRule: + return AttrSplatRule(args, meta) + + @v_args(meta=True) + def attr_splat_expr_term(self, meta: Meta, args) -> AttrSplatExprTermRule: + return AttrSplatExprTermRule(args, meta) + + @v_args(meta=True) + def full_splat(self, meta: Meta, args) -> FullSplatRule: + return FullSplatRule(args, meta) + + @v_args(meta=True) + def full_splat_expr_term(self, meta: Meta, args) -> FullSplatExprTermRule: + return FullSplatExprTermRule(args, meta) + + @v_args(meta=True) + def for_tuple_expr(self, meta: Meta, args) -> ForTupleExprRule: + return ForTupleExprRule(args, meta) + + @v_args(meta=True) + def for_object_expr(self, meta: Meta, args) -> ForObjectExprRule: + return ForObjectExprRule(args, meta) + + @v_args(meta=True) + def for_intro(self, meta: Meta, args) -> ForIntroRule: + return ForIntroRule(args, meta) + + @v_args(meta=True) + def for_cond(self, meta: Meta, args) -> ForCondRule: + return ForCondRule(args, meta) diff --git a/hcl2/utils.py b/hcl2/utils.py new file mode 100644 index 00000000..68c32ebc --- /dev/null +++ b/hcl2/utils.py @@ -0,0 +1,72 @@ +import re +from contextlib import contextmanager +from dataclasses import dataclass, replace +from typing import Generator + +HEREDOC_PATTERN = re.compile(r"<<([a-zA-Z][a-zA-Z0-9._-]+)\n([\s\S]*)\1", re.S) +HEREDOC_TRIM_PATTERN = re.compile(r"<<-([a-zA-Z][a-zA-Z0-9._-]+)\n([\s\S]*)\1", re.S) + + +@dataclass +class SerializationOptions: + with_comments: bool = True + with_meta: bool = False + wrap_objects: bool = False + wrap_tuples: bool = False + explicit_blocks: bool = True + preserve_heredocs: bool = True + force_operation_parentheses: bool = False + + +@dataclass +class SerializationContext: + inside_dollar_string: bool = False + inside_parentheses: bool = False + + def replace(self, **kwargs) -> "SerializationContext": + return replace(self, **kwargs) + + @contextmanager + def copy(self, **kwargs) -> Generator["SerializationContext", None, None]: + """Context manager that yields a modified copy of the context""" + modified_context = self.replace(**kwargs) + yield modified_context + + @contextmanager + def modify(self, **kwargs): + original_values = {key: getattr(self, key) for key in kwargs} + + for key, value in kwargs.items(): + setattr(self, key, value) + + try: + yield + finally: + # Restore original values + for key, value in original_values.items(): + setattr(self, key, value) + + +def is_dollar_string(value: str) -> bool: + if not isinstance(value, str): + return False + return value.startswith("${") and value.endswith("}") + + +def to_dollar_string(value: str) -> str: + if not is_dollar_string(value): + return f"${{{value}}}" + return value + + +def unwrap_dollar_string(value: str) -> str: + if is_dollar_string(value): + return value[2:-1] + return value + + +def wrap_into_parentheses(value: str) -> str: + if is_dollar_string(value): + value = unwrap_dollar_string(value) + return to_dollar_string(f"({value})") + return f"({value})" diff --git a/test/helpers/__init__.py b/test/helpers/__init__.py deleted file mode 100644 index ba33e308..00000000 --- a/test/helpers/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -""" -Helper functions for tests -""" diff --git a/test/helpers/hcl2_helper.py b/test/helpers/hcl2_helper.py deleted file mode 100644 index 5acee1e7..00000000 --- a/test/helpers/hcl2_helper.py +++ /dev/null @@ -1,21 +0,0 @@ -# pylint:disable=C0114,C0115,C0116 - -from lark import Tree - -from hcl2.parser import parser -from hcl2.transformer import DictTransformer - - -class Hcl2Helper: - @classmethod - def load(cls, syntax: str) -> Tree: - return parser().parse(syntax) - - @classmethod - def load_to_dict(cls, syntax) -> dict: - tree = cls.load(syntax) - return DictTransformer().transform(tree) - - @classmethod - def build_argument(cls, identifier: str, expression: str = '"expression"') -> str: - return f"{identifier} = {expression}" diff --git a/test/integration/__init__.py b/test/integration/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/helpers/terraform-config/test_floats.tf b/test/integration/hcl2_original/floats.tf similarity index 100% rename from test/helpers/terraform-config/test_floats.tf rename to test/integration/hcl2_original/floats.tf diff --git a/test/helpers/terraform-config/nulls.tf b/test/integration/hcl2_original/nulls.tf similarity index 100% rename from test/helpers/terraform-config/nulls.tf rename to test/integration/hcl2_original/nulls.tf diff --git a/test/integration/hcl2_original/object_keys.tf b/test/integration/hcl2_original/object_keys.tf new file mode 100644 index 00000000..913d5a42 --- /dev/null +++ b/test/integration/hcl2_original/object_keys.tf @@ -0,0 +1,8 @@ +bar = { + 0: 0, + "foo": 1 + baz : 2, + (var.account) : 3 + (format("key_prefix_%s", local.foo)) : 4 + "prefix_${var.account}:${var.user}_suffix": 5, +} diff --git a/test/integration/hcl2_original/operators.tf b/test/integration/hcl2_original/operators.tf new file mode 100644 index 00000000..f8351161 --- /dev/null +++ b/test/integration/hcl2_original/operators.tf @@ -0,0 +1,15 @@ +locals { + addition_1 = ((a + b) + c) + addition_2 = a + b + addition_3 = (a + b) + eq_before_and = var.env == "prod" && var.debug + and_before_ternary = true && true ? 1 : 0 + mixed_arith_cmp = var.a + var.b * var.c > 10 + full_chain = a + b == c && d || e + left_assoc_sub = a - b - c + left_assoc_mul_div = (a * b) / c + nested_ternary = (a ? b : c) ? d : e + unary_precedence = !a && b + neg_precedence = (-a) + b + neg_parentheses = -(a + b) +} diff --git a/test/integration/hcl2_original/resource_keyword_attribute.tf b/test/integration/hcl2_original/resource_keyword_attribute.tf new file mode 100644 index 00000000..fca27d75 --- /dev/null +++ b/test/integration/hcl2_original/resource_keyword_attribute.tf @@ -0,0 +1,8 @@ +resource "custom_provider_resource" "resource_name" { + name = "resource_name" + attribute = "attribute_value" + if = "attribute_value2" + in = "attribute_value3" + for = "attribute_value4" + for_each = "attribute_value5" +} diff --git a/test/integration/hcl2_original/smoke.tf b/test/integration/hcl2_original/smoke.tf new file mode 100644 index 00000000..99537532 --- /dev/null +++ b/test/integration/hcl2_original/smoke.tf @@ -0,0 +1,61 @@ + +block label1 label2 { + a = 5 + b = 1256.5 + c = 15 + (10 * 12) + d = (- a) + e = ( + a == b + ? true : false + ) + f = "${"this is a string"}" + g = 1 == 2 + h = { + k1 = 5, + k2 = 10 + , + "k3" = {k4 = "a"} + (5 + 5) = "d" + k5.attr.attr = "e" + } + i = [ + a, b + , + "c${aaa}", + d, + [1, 2, 3,], + f(a), + provider::func::aa(5) + + ] + j = func( + a, b + , c, + d ... + + ) + k = a.b.5 + l = a.*.b + m = a[*][c].a.*.1 + + block b1 { + a = 1 + } +} + +block { + route53_forwarding_rule_shares = { + for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) : + "${forwarding_rule_key}" => { + aws_account_ids = [ + for account_name in var.route53_resolver_forwarding_rule_shares[ + forwarding_rule_key + ].aws_account_names : + module.remote_state_subaccounts.map[account_name].outputs["aws_account_id"] + ] + } + ... + if + substr(bucket_name, 0, 1) == "l" + } +} diff --git a/test/helpers/terraform-config/string_interpolations.tf b/test/integration/hcl2_original/string_interpolations.tf similarity index 68% rename from test/helpers/terraform-config/string_interpolations.tf rename to test/integration/hcl2_original/string_interpolations.tf index 582b4aac..f9ac4e18 100644 --- a/test/helpers/terraform-config/string_interpolations.tf +++ b/test/integration/hcl2_original/string_interpolations.tf @@ -1,6 +1,6 @@ -locals { - simple_interpolation = "prefix:${var.foo}-suffix" - embedded_interpolation = "(long substring without interpolation); ${module.special_constants.aws_accounts["aaa-${local.foo}-${local.bar}"]}/us-west-2/key_foo" +block label1 label3 { + simple_interpolation = "prefix:${var}-suffix" + embedded_interpolation = "(long substring without interpolation); ${"aaa-${local}-${local}"}/us-west-2/key_foo" deeply_nested_interpolation = "prefix1-${"prefix2-${"prefix3-$${foo:bar}"}"}" escaped_interpolation = "prefix:$${aws:username}-suffix" simple_and_escaped = "${"bar"}$${baz:bat}" diff --git a/test/helpers/terraform-config/unicode_strings.tf b/test/integration/hcl2_original/unicode_strings.tf similarity index 100% rename from test/helpers/terraform-config/unicode_strings.tf rename to test/integration/hcl2_original/unicode_strings.tf diff --git a/test/integration/hcl2_reconstructed/floats.tf b/test/integration/hcl2_reconstructed/floats.tf new file mode 100644 index 00000000..810108b2 --- /dev/null +++ b/test/integration/hcl2_reconstructed/floats.tf @@ -0,0 +1,26 @@ +locals { + simple_float = 123.456 + small_float = 0.123 + large_float = 9876543.21 + negative_float = -42.5 + negative_small = -0.001 + scientific_positive = 123000.0 + scientific_negative = 0.00987 + scientific_large = 6.022e+23 + integer_as_float = 100.0 + float_calculation = 10500.0 * 3.0 / 2.1 + float_comparison = 50.0 > 2.3 ? 1.0 : 0.0 + float_list = [ + 1.1, + 2.2, + 3.3, + -4.4, + 550.0, + ] + float_object = { + pi = 3.14159, + euler = 2.71828, + sqrt2 = 1.41421, + scientific = -12300.0, + } +} diff --git a/test/integration/hcl2_reconstructed/nulls.tf b/test/integration/hcl2_reconstructed/nulls.tf new file mode 100644 index 00000000..1e487789 --- /dev/null +++ b/test/integration/hcl2_reconstructed/nulls.tf @@ -0,0 +1,11 @@ +terraform = { + unary = !null, + binary = (a == null), + tuple = [ + null, + 1, + 2, + ], + single = null, + conditional = null ? null : null, +} diff --git a/test/integration/hcl2_reconstructed/object_keys.tf b/test/integration/hcl2_reconstructed/object_keys.tf new file mode 100644 index 00000000..497e65a6 --- /dev/null +++ b/test/integration/hcl2_reconstructed/object_keys.tf @@ -0,0 +1,8 @@ +bar = { + 0 = 0, + "foo" = 1, + baz = 2, + (var.account) = 3, + (format("key_prefix_%s", local.foo)) = 4, + "prefix_${var.account}:${var.user}_suffix" = 5, +} diff --git a/test/integration/hcl2_reconstructed/operators.tf b/test/integration/hcl2_reconstructed/operators.tf new file mode 100644 index 00000000..323759aa --- /dev/null +++ b/test/integration/hcl2_reconstructed/operators.tf @@ -0,0 +1,15 @@ +locals { + addition_1 = ((a + b) + c) + addition_2 = a + b + addition_3 = (a + b) + eq_before_and = var.env == "prod" && var.debug + and_before_ternary = true && true ? 1 : 0 + mixed_arith_cmp = var.a + var.b * var.c > 10 + full_chain = a + b == c && d || e + left_assoc_sub = a - b - c + left_assoc_mul_div = (a * b) / c + nested_ternary = (a ? b : c) ? d : e + unary_precedence = !a && b + neg_precedence = (-a) + b + neg_parentheses = -(a + b) +} diff --git a/test/integration/hcl2_reconstructed/resource_keyword_attribute.tf b/test/integration/hcl2_reconstructed/resource_keyword_attribute.tf new file mode 100644 index 00000000..498777e0 --- /dev/null +++ b/test/integration/hcl2_reconstructed/resource_keyword_attribute.tf @@ -0,0 +1,8 @@ +resource"custom_provider_resource""resource_name" { + name = "resource_name" + attribute = "attribute_value" + if = "attribute_value2" + in = "attribute_value3" + for = "attribute_value4" + for_each = "attribute_value5" +} diff --git a/test/integration/hcl2_reconstructed/smoke.tf b/test/integration/hcl2_reconstructed/smoke.tf new file mode 100644 index 00000000..b2de26f3 --- /dev/null +++ b/test/integration/hcl2_reconstructed/smoke.tf @@ -0,0 +1,53 @@ +block label1 label2 { + a = 5 + b = 1256.5 + c = 15 + (10 * 12) + d = (-a) + e = (a == b ? true : false) + f = "${"this is a string"}" + g = 1 == 2 + h = { + k1 = 5, + k2 = 10, + "k3" = { + k4 = "a", + }, + (5 + 5) = "d", + k5.attr.attr = "e", + } + i = [ + a, + b, + "c${aaa}", + d, + [ + 1, + 2, + 3, + ], + f(a), + provider::func::aa(), + ] + j = func(a, b, c, d) + k = a.b.5 + l = a.*.b + m = a[*][c].a.*.1 + + block b1 { + a = 1 + } +} + + +block { + route53_forwarding_rule_shares = { + for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) : + "${forwarding_rule_key}" => { + aws_account_ids = [ + for account_name in var.route53_resolver_forwarding_rule_shares[forwarding_rule_key].aws_account_names : + module.remote_state_subaccounts.map[account_name].outputs["aws_account_id"] + + ] + } ... if substr(bucket_name, 0, 1) == "l" + } +} diff --git a/test/integration/hcl2_reconstructed/string_interpolations.tf b/test/integration/hcl2_reconstructed/string_interpolations.tf new file mode 100644 index 00000000..73df4715 --- /dev/null +++ b/test/integration/hcl2_reconstructed/string_interpolations.tf @@ -0,0 +1,9 @@ +block label1 label3 { + simple_interpolation = "prefix:${var}-suffix" + embedded_interpolation = "(long substring without interpolation); ${"aaa-${local}-${local}"}/us-west-2/key_foo" + deeply_nested_interpolation = "prefix1-${"prefix2-${"prefix3-$${foo:bar}"}"}" + escaped_interpolation = "prefix:$${aws:username}-suffix" + simple_and_escaped = "${"bar"}$${baz:bat}" + simple_and_escaped_reversed = "$${baz:bat}${"bar"}" + nested_escaped = "bar-${"$${baz:bat}"}" +} diff --git a/test/integration/hcl2_reconstructed/unicode_strings.tf b/test/integration/hcl2_reconstructed/unicode_strings.tf new file mode 100644 index 00000000..8c4df70e --- /dev/null +++ b/test/integration/hcl2_reconstructed/unicode_strings.tf @@ -0,0 +1,21 @@ +locals { + basic_unicode = "Hello, 世界! こんにちは Привет नमस्ते" + unicode_escapes = "© ♥ ♪ ☠ ☺" + emoji_string = "🚀 🌍 🔥 🎉" + rtl_text = "English and العربية text mixed" + complex_unicode = "Python (파이썬) es 很棒的! ♥ αβγδ" + ascii = "ASCII: abc123" + emoji = "Emoji: 🚀🌍🔥🎉" + math = "Math: ∑∫√∞≠≤≥" + currency = "Currency: £€¥₹₽₩" + arrows = "Arrows: ←↑→↓↔↕" + cjk = "CJK: 你好世界안녕하세요こんにちは" + cyrillic = "Cyrillic: Привет мир" + special = "Special: ©®™§¶†‡" + mixed_content = <<-EOT + Line with interpolation: ${var.name} + Line with emoji: 👨‍👩‍👧‍👦 + Line with quotes: "quoted text" + Line with backslash: \escaped + EOT +} diff --git a/test/integration/json_reserialized/floats.json b/test/integration/json_reserialized/floats.json new file mode 100644 index 00000000..18078a18 --- /dev/null +++ b/test/integration/json_reserialized/floats.json @@ -0,0 +1,31 @@ +{ + "locals": [ + { + "simple_float": 123.456, + "small_float": 0.123, + "large_float": 9876543.21, + "negative_float": -42.5, + "negative_small": -0.001, + "scientific_positive": 123000.0, + "scientific_negative": 0.00987, + "scientific_large": 6.022e+23, + "integer_as_float": 100.0, + "float_calculation": "${10500.0 * 3.0 / 2.1}", + "float_comparison": "${50.0 > 2.3 ? 1.0 : 0.0}", + "float_list": [ + 1.1, + 2.2, + 3.3, + -4.4, + 550.0 + ], + "float_object": { + "pi": 3.14159, + "euler": 2.71828, + "sqrt2": 1.41421, + "scientific": -12300.0 + }, + "__is_block__": true + } + ] +} diff --git a/test/integration/json_reserialized/nulls.json b/test/integration/json_reserialized/nulls.json new file mode 100644 index 00000000..9cbdd755 --- /dev/null +++ b/test/integration/json_reserialized/nulls.json @@ -0,0 +1,13 @@ +{ + "terraform": { + "unary": "${!null}", + "binary": "${(a == null)}", + "tuple": [ + "null", + 1, + 2 + ], + "single": "null", + "conditional": "${null ? null : null}" + } +} diff --git a/test/integration/json_reserialized/object_keys.json b/test/integration/json_reserialized/object_keys.json new file mode 100644 index 00000000..8acccdea --- /dev/null +++ b/test/integration/json_reserialized/object_keys.json @@ -0,0 +1,10 @@ +{ + "bar": { + "0": 0, + "\"foo\"": 1, + "baz": 2, + "${(var.account)}": 3, + "${(format(\"key_prefix_%s\", local.foo))}": 4, + "\"prefix_${var.account}:${var.user}_suffix\"": 5 + } +} diff --git a/test/integration/json_reserialized/operators.json b/test/integration/json_reserialized/operators.json new file mode 100644 index 00000000..5c611ea7 --- /dev/null +++ b/test/integration/json_reserialized/operators.json @@ -0,0 +1,20 @@ +{ + "locals": [ + { + "addition_1": "${((a + b) + c)}", + "addition_2": "${a + b}", + "addition_3": "${(a + b)}", + "eq_before_and": "${var.env == \"prod\" && var.debug}", + "and_before_ternary": "${true && true ? 1 : 0}", + "mixed_arith_cmp": "${var.a + var.b * var.c > 10}", + "full_chain": "${a + b == c && d || e}", + "left_assoc_sub": "${a - b - c}", + "left_assoc_mul_div": "${(a * b) / c}", + "nested_ternary": "${(a ? b : c) ? d : e}", + "unary_precedence": "${!a && b}", + "neg_precedence": "${(-a) + b}", + "neg_parentheses": "${-(a + b)}", + "__is_block__": true + } + ] +} diff --git a/test/integration/json_reserialized/resource_keyword_attribute.json b/test/integration/json_reserialized/resource_keyword_attribute.json new file mode 100644 index 00000000..6826a0b8 --- /dev/null +++ b/test/integration/json_reserialized/resource_keyword_attribute.json @@ -0,0 +1,17 @@ +{ + "resource": [ + { + "\"custom_provider_resource\"": { + "\"resource_name\"": { + "name": "\"resource_name\"", + "attribute": "\"attribute_value\"", + "if": "\"attribute_value2\"", + "in": "\"attribute_value3\"", + "for": "\"attribute_value4\"", + "for_each": "\"attribute_value5\"", + "__is_block__": true + } + } + } + ] +} diff --git a/test/integration/json_reserialized/smoke.json b/test/integration/json_reserialized/smoke.json new file mode 100644 index 00000000..670c5be3 --- /dev/null +++ b/test/integration/json_reserialized/smoke.json @@ -0,0 +1,56 @@ +{ + "block": [ + { + "label1": { + "label2": { + "a": 5, + "b": 1256.5, + "c": "${15 + (10 * 12)}", + "d": "${(-a)}", + "e": "${(a == b ? true : false)}", + "f": "\"${\"this is a string\"}\"", + "g": "${1 == 2}", + "h": { + "k1": 5, + "k2": 10, + "\"k3\"": { + "k4": "\"a\"" + }, + "${(5 + 5)}": "\"d\"", + "k5.attr.attr": "\"e\"" + }, + "i": [ + "a", + "b", + "\"c${aaa}\"", + "d", + [ + 1, + 2, + 3 + ], + "${f(a)}", + "${provider::func::aa()}" + ], + "j": "${func(a, b, c, d)}", + "k": "${a.b.5}", + "l": "${a.*.b}", + "m": "${a[*][c].a.*.1}", + "block": [ + { + "b1": { + "a": 1, + "__is_block__": true + } + } + ], + "__is_block__": true + } + } + }, + { + "route53_forwarding_rule_shares": "${{for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) : \"${forwarding_rule_key}\" => {aws_account_ids = [for account_name in var.route53_resolver_forwarding_rule_shares[forwarding_rule_key].aws_account_names : module.remote_state_subaccounts.map[account_name].outputs[\"aws_account_id\"]]}... if substr(bucket_name, 0, 1) == \"l\"}}", + "__is_block__": true + } + ] +} diff --git a/test/integration/json_reserialized/string_interpolations.json b/test/integration/json_reserialized/string_interpolations.json new file mode 100644 index 00000000..059fcfbf --- /dev/null +++ b/test/integration/json_reserialized/string_interpolations.json @@ -0,0 +1,18 @@ +{ + "block": [ + { + "label1": { + "label3": { + "simple_interpolation": "\"prefix:${var}-suffix\"", + "embedded_interpolation": "\"(long substring without interpolation); ${\"aaa-${local}-${local}\"}/us-west-2/key_foo\"", + "deeply_nested_interpolation": "\"prefix1-${\"prefix2-${\"prefix3-$${foo:bar}\"}\"}\"", + "escaped_interpolation": "\"prefix:$${aws:username}-suffix\"", + "simple_and_escaped": "\"${\"bar\"}$${baz:bat}\"", + "simple_and_escaped_reversed": "\"$${baz:bat}${\"bar\"}\"", + "nested_escaped": "\"bar-${\"$${baz:bat}\"}\"", + "__is_block__": true + } + } + } + ] +} \ No newline at end of file diff --git a/test/integration/json_reserialized/unicode_strings.json b/test/integration/json_reserialized/unicode_strings.json new file mode 100644 index 00000000..5f8f0095 --- /dev/null +++ b/test/integration/json_reserialized/unicode_strings.json @@ -0,0 +1,21 @@ +{ + "locals": [ + { + "basic_unicode": "\"Hello, \u4e16\u754c! \u3053\u3093\u306b\u3061\u306f \u041f\u0440\u0438\u0432\u0435\u0442 \u0928\u092e\u0938\u094d\u0924\u0947\"", + "unicode_escapes": "\"\u00a9 \u2665 \u266a \u2620 \u263a\"", + "emoji_string": "\"\ud83d\ude80 \ud83c\udf0d \ud83d\udd25 \ud83c\udf89\"", + "rtl_text": "\"English and \u0627\u0644\u0639\u0631\u0628\u064a\u0629 text mixed\"", + "complex_unicode": "\"Python (\ud30c\uc774\uc36c) es \u5f88\u68d2\u7684! \u2665 \u03b1\u03b2\u03b3\u03b4\"", + "ascii": "\"ASCII: abc123\"", + "emoji": "\"Emoji: \ud83d\ude80\ud83c\udf0d\ud83d\udd25\ud83c\udf89\"", + "math": "\"Math: \u2211\u222b\u221a\u221e\u2260\u2264\u2265\"", + "currency": "\"Currency: \u00a3\u20ac\u00a5\u20b9\u20bd\u20a9\"", + "arrows": "\"Arrows: \u2190\u2191\u2192\u2193\u2194\u2195\"", + "cjk": "\"CJK: \u4f60\u597d\u4e16\u754c\uc548\ub155\ud558\uc138\uc694\u3053\u3093\u306b\u3061\u306f\"", + "cyrillic": "\"Cyrillic: \u041f\u0440\u0438\u0432\u0435\u0442 \u043c\u0438\u0440\"", + "special": "\"Special: \u00a9\u00ae\u2122\u00a7\u00b6\u2020\u2021\"", + "mixed_content": "\"<<-EOT\n Line with interpolation: ${var.name}\n Line with emoji: \ud83d\udc68\u200d\ud83d\udc69\u200d\ud83d\udc67\u200d\ud83d\udc66\n Line with quotes: \"quoted text\"\n Line with backslash: \\escaped\n EOT\"", + "__is_block__": true + } + ] +} diff --git a/test/integration/json_serialized/floats.json b/test/integration/json_serialized/floats.json new file mode 100644 index 00000000..18078a18 --- /dev/null +++ b/test/integration/json_serialized/floats.json @@ -0,0 +1,31 @@ +{ + "locals": [ + { + "simple_float": 123.456, + "small_float": 0.123, + "large_float": 9876543.21, + "negative_float": -42.5, + "negative_small": -0.001, + "scientific_positive": 123000.0, + "scientific_negative": 0.00987, + "scientific_large": 6.022e+23, + "integer_as_float": 100.0, + "float_calculation": "${10500.0 * 3.0 / 2.1}", + "float_comparison": "${50.0 > 2.3 ? 1.0 : 0.0}", + "float_list": [ + 1.1, + 2.2, + 3.3, + -4.4, + 550.0 + ], + "float_object": { + "pi": 3.14159, + "euler": 2.71828, + "sqrt2": 1.41421, + "scientific": -12300.0 + }, + "__is_block__": true + } + ] +} diff --git a/test/integration/json_serialized/nulls.json b/test/integration/json_serialized/nulls.json new file mode 100644 index 00000000..9cbdd755 --- /dev/null +++ b/test/integration/json_serialized/nulls.json @@ -0,0 +1,13 @@ +{ + "terraform": { + "unary": "${!null}", + "binary": "${(a == null)}", + "tuple": [ + "null", + 1, + 2 + ], + "single": "null", + "conditional": "${null ? null : null}" + } +} diff --git a/test/integration/json_serialized/object_keys.json b/test/integration/json_serialized/object_keys.json new file mode 100644 index 00000000..8acccdea --- /dev/null +++ b/test/integration/json_serialized/object_keys.json @@ -0,0 +1,10 @@ +{ + "bar": { + "0": 0, + "\"foo\"": 1, + "baz": 2, + "${(var.account)}": 3, + "${(format(\"key_prefix_%s\", local.foo))}": 4, + "\"prefix_${var.account}:${var.user}_suffix\"": 5 + } +} diff --git a/test/integration/json_serialized/operators.json b/test/integration/json_serialized/operators.json new file mode 100644 index 00000000..5c611ea7 --- /dev/null +++ b/test/integration/json_serialized/operators.json @@ -0,0 +1,20 @@ +{ + "locals": [ + { + "addition_1": "${((a + b) + c)}", + "addition_2": "${a + b}", + "addition_3": "${(a + b)}", + "eq_before_and": "${var.env == \"prod\" && var.debug}", + "and_before_ternary": "${true && true ? 1 : 0}", + "mixed_arith_cmp": "${var.a + var.b * var.c > 10}", + "full_chain": "${a + b == c && d || e}", + "left_assoc_sub": "${a - b - c}", + "left_assoc_mul_div": "${(a * b) / c}", + "nested_ternary": "${(a ? b : c) ? d : e}", + "unary_precedence": "${!a && b}", + "neg_precedence": "${(-a) + b}", + "neg_parentheses": "${-(a + b)}", + "__is_block__": true + } + ] +} diff --git a/test/integration/json_serialized/resource_keyword_attribute.json b/test/integration/json_serialized/resource_keyword_attribute.json new file mode 100644 index 00000000..6826a0b8 --- /dev/null +++ b/test/integration/json_serialized/resource_keyword_attribute.json @@ -0,0 +1,17 @@ +{ + "resource": [ + { + "\"custom_provider_resource\"": { + "\"resource_name\"": { + "name": "\"resource_name\"", + "attribute": "\"attribute_value\"", + "if": "\"attribute_value2\"", + "in": "\"attribute_value3\"", + "for": "\"attribute_value4\"", + "for_each": "\"attribute_value5\"", + "__is_block__": true + } + } + } + ] +} diff --git a/test/integration/json_serialized/smoke.json b/test/integration/json_serialized/smoke.json new file mode 100644 index 00000000..670c5be3 --- /dev/null +++ b/test/integration/json_serialized/smoke.json @@ -0,0 +1,56 @@ +{ + "block": [ + { + "label1": { + "label2": { + "a": 5, + "b": 1256.5, + "c": "${15 + (10 * 12)}", + "d": "${(-a)}", + "e": "${(a == b ? true : false)}", + "f": "\"${\"this is a string\"}\"", + "g": "${1 == 2}", + "h": { + "k1": 5, + "k2": 10, + "\"k3\"": { + "k4": "\"a\"" + }, + "${(5 + 5)}": "\"d\"", + "k5.attr.attr": "\"e\"" + }, + "i": [ + "a", + "b", + "\"c${aaa}\"", + "d", + [ + 1, + 2, + 3 + ], + "${f(a)}", + "${provider::func::aa()}" + ], + "j": "${func(a, b, c, d)}", + "k": "${a.b.5}", + "l": "${a.*.b}", + "m": "${a[*][c].a.*.1}", + "block": [ + { + "b1": { + "a": 1, + "__is_block__": true + } + } + ], + "__is_block__": true + } + } + }, + { + "route53_forwarding_rule_shares": "${{for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) : \"${forwarding_rule_key}\" => {aws_account_ids = [for account_name in var.route53_resolver_forwarding_rule_shares[forwarding_rule_key].aws_account_names : module.remote_state_subaccounts.map[account_name].outputs[\"aws_account_id\"]]}... if substr(bucket_name, 0, 1) == \"l\"}}", + "__is_block__": true + } + ] +} diff --git a/test/integration/json_serialized/string_interpolations.json b/test/integration/json_serialized/string_interpolations.json new file mode 100644 index 00000000..059fcfbf --- /dev/null +++ b/test/integration/json_serialized/string_interpolations.json @@ -0,0 +1,18 @@ +{ + "block": [ + { + "label1": { + "label3": { + "simple_interpolation": "\"prefix:${var}-suffix\"", + "embedded_interpolation": "\"(long substring without interpolation); ${\"aaa-${local}-${local}\"}/us-west-2/key_foo\"", + "deeply_nested_interpolation": "\"prefix1-${\"prefix2-${\"prefix3-$${foo:bar}\"}\"}\"", + "escaped_interpolation": "\"prefix:$${aws:username}-suffix\"", + "simple_and_escaped": "\"${\"bar\"}$${baz:bat}\"", + "simple_and_escaped_reversed": "\"$${baz:bat}${\"bar\"}\"", + "nested_escaped": "\"bar-${\"$${baz:bat}\"}\"", + "__is_block__": true + } + } + } + ] +} \ No newline at end of file diff --git a/test/integration/json_serialized/unicode_strings.json b/test/integration/json_serialized/unicode_strings.json new file mode 100644 index 00000000..5f8f0095 --- /dev/null +++ b/test/integration/json_serialized/unicode_strings.json @@ -0,0 +1,21 @@ +{ + "locals": [ + { + "basic_unicode": "\"Hello, \u4e16\u754c! \u3053\u3093\u306b\u3061\u306f \u041f\u0440\u0438\u0432\u0435\u0442 \u0928\u092e\u0938\u094d\u0924\u0947\"", + "unicode_escapes": "\"\u00a9 \u2665 \u266a \u2620 \u263a\"", + "emoji_string": "\"\ud83d\ude80 \ud83c\udf0d \ud83d\udd25 \ud83c\udf89\"", + "rtl_text": "\"English and \u0627\u0644\u0639\u0631\u0628\u064a\u0629 text mixed\"", + "complex_unicode": "\"Python (\ud30c\uc774\uc36c) es \u5f88\u68d2\u7684! \u2665 \u03b1\u03b2\u03b3\u03b4\"", + "ascii": "\"ASCII: abc123\"", + "emoji": "\"Emoji: \ud83d\ude80\ud83c\udf0d\ud83d\udd25\ud83c\udf89\"", + "math": "\"Math: \u2211\u222b\u221a\u221e\u2260\u2264\u2265\"", + "currency": "\"Currency: \u00a3\u20ac\u00a5\u20b9\u20bd\u20a9\"", + "arrows": "\"Arrows: \u2190\u2191\u2192\u2193\u2194\u2195\"", + "cjk": "\"CJK: \u4f60\u597d\u4e16\u754c\uc548\ub155\ud558\uc138\uc694\u3053\u3093\u306b\u3061\u306f\"", + "cyrillic": "\"Cyrillic: \u041f\u0440\u0438\u0432\u0435\u0442 \u043c\u0438\u0440\"", + "special": "\"Special: \u00a9\u00ae\u2122\u00a7\u00b6\u2020\u2021\"", + "mixed_content": "\"<<-EOT\n Line with interpolation: ${var.name}\n Line with emoji: \ud83d\udc68\u200d\ud83d\udc69\u200d\ud83d\udc67\u200d\ud83d\udc66\n Line with quotes: \"quoted text\"\n Line with backslash: \\escaped\n EOT\"", + "__is_block__": true + } + ] +} diff --git a/test/integration/specialized/builder_basic.json b/test/integration/specialized/builder_basic.json new file mode 100644 index 00000000..da62720b --- /dev/null +++ b/test/integration/specialized/builder_basic.json @@ -0,0 +1,63 @@ +{ + "__is_block__": true, + "resource": [ + { + "aws_instance": { + "web": { + "__is_block__": true, + "ami": "\"ami-12345\"", + "instance_type": "\"t2.micro\"", + "count": 2 + } + } + }, + { + "aws_s3_bucket": { + "data": { + "__is_block__": true, + "bucket": "\"my-bucket\"", + "acl": "\"private\"" + } + } + }, + { + "aws_instance": { + "nested": { + "__is_block__": true, + "ami": "\"ami-99999\"", + "provisioner": [ + { + "local-exec": { + "__is_block__": true, + "command": "\"echo hello\"" + } + }, + { + "remote-exec": { + "__is_block__": true, + "inline": "[\"puppet apply\"]" + } + } + ] + } + } + } + ], + "variable": [ + { + "instance_type": { + "__is_block__": true, + "default": "\"t2.micro\"", + "description": "\"The instance type\"" + } + } + ], + "locals": [ + { + "__is_block__": true, + "port": 8080, + "enabled": true, + "name": "\"my-app\"" + } + ] +} diff --git a/test/integration/specialized/builder_basic.tf b/test/integration/specialized/builder_basic.tf new file mode 100644 index 00000000..b7ee2131 --- /dev/null +++ b/test/integration/specialized/builder_basic.tf @@ -0,0 +1,38 @@ +resource aws_instance web { + ami = "ami-12345" + instance_type = "t2.micro" + count = 2 +} + + +resource aws_s3_bucket data { + bucket = "my-bucket" + acl = "private" +} + + +resource aws_instance nested { + ami = "ami-99999" + + provisioner local-exec { + command = "echo hello" + } + + + provisioner remote-exec { + inline = ["puppet apply"] + } +} + + +variable instance_type { + default = "t2.micro" + description = "The instance type" +} + + +locals { + port = 8080 + enabled = true + name = "my-app" +} diff --git a/test/integration/specialized/builder_basic_reparsed.json b/test/integration/specialized/builder_basic_reparsed.json new file mode 100644 index 00000000..32e4954d --- /dev/null +++ b/test/integration/specialized/builder_basic_reparsed.json @@ -0,0 +1,64 @@ +{ + "resource": [ + { + "aws_instance": { + "web": { + "ami": "\"ami-12345\"", + "instance_type": "\"t2.micro\"", + "count": 2, + "__is_block__": true + } + } + }, + { + "aws_s3_bucket": { + "data": { + "bucket": "\"my-bucket\"", + "acl": "\"private\"", + "__is_block__": true + } + } + }, + { + "aws_instance": { + "nested": { + "ami": "\"ami-99999\"", + "provisioner": [ + { + "local-exec": { + "command": "\"echo hello\"", + "__is_block__": true + } + }, + { + "remote-exec": { + "inline": [ + "\"puppet apply\"" + ], + "__is_block__": true + } + } + ], + "__is_block__": true + } + } + } + ], + "variable": [ + { + "instance_type": { + "default": "\"t2.micro\"", + "description": "\"The instance type\"", + "__is_block__": true + } + } + ], + "locals": [ + { + "port": 8080, + "enabled": "true", + "name": "\"my-app\"", + "__is_block__": true + } + ] +} diff --git a/test/integration/specialized/builder_basic_reserialized.json b/test/integration/specialized/builder_basic_reserialized.json new file mode 100644 index 00000000..364ef0c3 --- /dev/null +++ b/test/integration/specialized/builder_basic_reserialized.json @@ -0,0 +1,62 @@ +{ + "resource": [ + { + "aws_instance": { + "web": { + "ami": "\"ami-12345\"", + "instance_type": "\"t2.micro\"", + "count": 2, + "__is_block__": true + } + } + }, + { + "aws_s3_bucket": { + "data": { + "bucket": "\"my-bucket\"", + "acl": "\"private\"", + "__is_block__": true + } + } + }, + { + "aws_instance": { + "nested": { + "ami": "\"ami-99999\"", + "provisioner": [ + { + "local-exec": { + "command": "\"echo hello\"", + "__is_block__": true + } + }, + { + "remote-exec": { + "inline": "[\"puppet apply\"]", + "__is_block__": true + } + } + ], + "__is_block__": true + } + } + } + ], + "variable": [ + { + "instance_type": { + "default": "\"t2.micro\"", + "description": "\"The instance type\"", + "__is_block__": true + } + } + ], + "locals": [ + { + "port": 8080, + "enabled": "true", + "name": "\"my-app\"", + "__is_block__": true + } + ] +} diff --git a/test/integration/specialized/operator_precedence.json b/test/integration/specialized/operator_precedence.json new file mode 100644 index 00000000..35adb5bb --- /dev/null +++ b/test/integration/specialized/operator_precedence.json @@ -0,0 +1,20 @@ +{ + "locals": [ + { + "addition_1": "${((a + b) + c)}", + "addition_2": "${a + b}", + "addition_3": "${(a + b)}", + "eq_before_and": "${(var.env == \"prod\") && var.debug}", + "and_before_ternary": "${(true && true) ? 1 : 0}", + "mixed_arith_cmp": "${(var.a + (var.b * var.c)) > 10}", + "full_chain": "${(((a + b) == c) && d) || e}", + "left_assoc_sub": "${(a - b) - c}", + "left_assoc_mul_div": "${(a * b) / c}", + "nested_ternary": "${(a ? b : c) ? d : e}", + "unary_precedence": "${(!a) && b}", + "neg_precedence": "${(-a) + b}", + "neg_parentheses": "${-(a + b)}", + "__is_block__": true + } + ] +} diff --git a/test/integration/specialized/operator_precedence.tf b/test/integration/specialized/operator_precedence.tf new file mode 100644 index 00000000..f8351161 --- /dev/null +++ b/test/integration/specialized/operator_precedence.tf @@ -0,0 +1,15 @@ +locals { + addition_1 = ((a + b) + c) + addition_2 = a + b + addition_3 = (a + b) + eq_before_and = var.env == "prod" && var.debug + and_before_ternary = true && true ? 1 : 0 + mixed_arith_cmp = var.a + var.b * var.c > 10 + full_chain = a + b == c && d || e + left_assoc_sub = a - b - c + left_assoc_mul_div = (a * b) / c + nested_ternary = (a ? b : c) ? d : e + unary_precedence = !a && b + neg_precedence = (-a) + b + neg_parentheses = -(a + b) +} diff --git a/test/integration/test_round_trip.py b/test/integration/test_round_trip.py new file mode 100644 index 00000000..3d2bbbb0 --- /dev/null +++ b/test/integration/test_round_trip.py @@ -0,0 +1,191 @@ +"""Round-trip tests for the HCL2 → JSON → HCL2 pipeline. + +Every test starts from the source HCL files in test/integration/hcl2_original/ +and runs the pipeline forward from there, comparing actuals against expected +outputs at each stage: + +1. HCL → JSON serialization (parse + transform + serialize) +2. JSON → JSON reserialization (serialize + deserialize + reserialize) +3. JSON → HCL reconstruction (serialize + deserialize + format + reconstruct) +4. Full round-trip (HCL → JSON → HCL → JSON produces identical JSON) +""" + +import json +from enum import Enum +from pathlib import Path +from typing import List +from unittest import TestCase + +from hcl2.api import parses_to_tree +from hcl2.deserializer import BaseDeserializer +from hcl2.formatter import BaseFormatter +from hcl2.reconstructor import HCLReconstructor +from hcl2.transformer import RuleTransformer + +INTEGRATION_DIR = Path(__file__).absolute().parent +HCL2_ORIGINAL_DIR = INTEGRATION_DIR / "hcl2_original" + +_STEP_DIRS = { + "hcl2_original": HCL2_ORIGINAL_DIR, + "hcl2_reconstructed": INTEGRATION_DIR / "hcl2_reconstructed", + "json_serialized": INTEGRATION_DIR / "json_serialized", + "json_reserialized": INTEGRATION_DIR / "json_reserialized", +} + +_STEP_SUFFIXES = { + "hcl2_original": ".tf", + "hcl2_reconstructed": ".tf", + "json_serialized": ".json", + "json_reserialized": ".json", +} + + +class SuiteStep(Enum): + ORIGINAL = "hcl2_original" + RECONSTRUCTED = "hcl2_reconstructed" + JSON_SERIALIZED = "json_serialized" + JSON_RESERIALIZED = "json_reserialized" + + +def _get_suites() -> List[str]: + """ + Get a list of the test suites. + Names of a test suite is a name of file in `test/integration/hcl2_original/` without the .tf suffix. + + Override SUITES to run a specific subset, e.g. SUITES = ["config"] + """ + return SUITES or sorted( + file.stem for file in HCL2_ORIGINAL_DIR.iterdir() if file.is_file() + ) + + +# set this to arbitrary list of test suites to run, +# e.g. `SUITES = ["smoke"]` to run the tests only for `test/integration/hcl2_original/smoke.tf` +SUITES: List[str] = [] + + +def _get_suite_file(suite_name: str, step: SuiteStep) -> Path: + """Return the path for a given suite name and pipeline step.""" + return _STEP_DIRS[step.value] / (suite_name + _STEP_SUFFIXES[step.value]) + + +def _parse_and_serialize(hcl_text: str, options=None) -> dict: + """Parse HCL text and serialize to a Python dict.""" + parsed_tree = parses_to_tree(hcl_text) + rules = RuleTransformer().transform(parsed_tree) + if options: + return rules.serialize(options=options) + return rules.serialize() + + +def _deserialize_and_reserialize(serialized: dict) -> dict: + """Deserialize a Python dict back through the rule tree and reserialize.""" + deserializer = BaseDeserializer() + formatter = BaseFormatter() + deserialized = deserializer.load_python(serialized) + formatter.format_tree(deserialized) + return deserialized.serialize() + + +def _deserialize_and_reconstruct(serialized: dict) -> str: + """Deserialize a Python dict and reconstruct HCL text.""" + deserializer = BaseDeserializer() + formatter = BaseFormatter() + reconstructor = HCLReconstructor() + deserialized = deserializer.load_python(serialized) + formatter.format_tree(deserialized) + lark_tree = deserialized.to_lark() + return reconstructor.reconstruct(lark_tree) + + +class TestRoundTripSerialization(TestCase): + """Test HCL2 → JSON serialization: parse HCL, transform, serialize, compare with expected JSON.""" + + maxDiff = None + + def test_hcl_to_json(self): + for suite in _get_suites(): + with self.subTest(suite=suite): + hcl_path = _get_suite_file(suite, SuiteStep.ORIGINAL) + json_path = _get_suite_file(suite, SuiteStep.JSON_SERIALIZED) + + actual = _parse_and_serialize(hcl_path.read_text()) + expected = json.loads(json_path.read_text()) + + self.assertEqual( + actual, + expected, + f"HCL → JSON serialization mismatch for {suite}", + ) + + +class TestRoundTripReserialization(TestCase): + """Test JSON → JSON reserialization: parse HCL, serialize, deserialize, reserialize, compare with expected.""" + + maxDiff = None + + def test_json_reserialization(self): + for suite in _get_suites(): + with self.subTest(suite=suite): + hcl_path = _get_suite_file(suite, SuiteStep.ORIGINAL) + json_reserialized_path = _get_suite_file(suite, SuiteStep.JSON_RESERIALIZED) + + serialized = _parse_and_serialize(hcl_path.read_text()) + actual = _deserialize_and_reserialize(serialized) + + expected = json.loads(json_reserialized_path.read_text()) + self.assertEqual( + actual, + expected, + f"JSON reserialization mismatch for {suite}", + ) + + +class TestRoundTripReconstruction(TestCase): + """Test JSON → HCL reconstruction: parse HCL, serialize, deserialize, format, reconstruct, compare with expected HCL.""" + + maxDiff = None + + def test_json_to_hcl(self): + for suite in _get_suites(): + with self.subTest(suite=suite): + hcl_path = _get_suite_file(suite, SuiteStep.ORIGINAL) + hcl_reconstructed_path = _get_suite_file(suite, SuiteStep.RECONSTRUCTED) + + serialized = _parse_and_serialize(hcl_path.read_text()) + actual = _deserialize_and_reconstruct(serialized) + + expected = hcl_reconstructed_path.read_text() + self.assertMultiLineEqual( + actual, + expected, + f"HCL reconstruction mismatch for {suite}", + ) + + +class TestRoundTripFull(TestCase): + """Test full round-trip: HCL → JSON → HCL → JSON should produce matching JSON.""" + + maxDiff = None + + def test_full_round_trip(self): + for suite in _get_suites(): + with self.subTest(suite=suite): + hcl_path = _get_suite_file(suite, SuiteStep.ORIGINAL) + original_hcl = hcl_path.read_text() + + # Forward: HCL → JSON + serialized = _parse_and_serialize(original_hcl) + + # Reconstruct: JSON → HCL + reconstructed_hcl = _deserialize_and_reconstruct(serialized) + + # Reparse: reconstructed HCL → JSON + reserialized = _parse_and_serialize(reconstructed_hcl) + + self.assertEqual( + reserialized, + serialized, + f"Full round-trip mismatch for {suite}: " + f"HCL → JSON → HCL → JSON did not produce identical JSON", + ) diff --git a/test/integration/test_specialized.py b/test/integration/test_specialized.py new file mode 100644 index 00000000..d1b817e2 --- /dev/null +++ b/test/integration/test_specialized.py @@ -0,0 +1,77 @@ +"""Specialized integration tests for specific features and scenarios. + +Unlike the suite-based round-trip tests, these target individual features +(operator precedence, Builder round-trip) with dedicated golden files +in test/integration/special/. +""" + +import json +from pathlib import Path +from unittest import TestCase + +from hcl2.utils import SerializationOptions + +from test.integration.test_round_trip import ( + _parse_and_serialize, + _deserialize_and_reserialize, + _deserialize_and_reconstruct, +) + +SPECIAL_DIR = Path(__file__).absolute().parent / "specialized" + + +class TestOperatorPrecedence(TestCase): + """Test that parsed expressions correctly represent operator precedence. + + Serializes with force_operation_parentheses=True so that implicit + precedence becomes explicit parentheses in the output. + See: https://github.com/amplify-education/python-hcl2/issues/248 + """ + + maxDiff = None + _OPTIONS = SerializationOptions(force_operation_parentheses=True) + + def test_operator_precedence(self): + hcl_path = SPECIAL_DIR / "operator_precedence.tf" + json_path = SPECIAL_DIR / "operator_precedence.json" + + actual = _parse_and_serialize(hcl_path.read_text(), options=self._OPTIONS) + expected = json.loads(json_path.read_text()) + + self.assertEqual(actual, expected) + + +class TestBuilderRoundTrip(TestCase): + """Test that dicts produced by Builder can be deserialized, reconstructed to + valid HCL, and reparsed back to equivalent dicts. + + Pipeline: Builder.build() → from_dict → reconstruct → HCL text + HCL text → parse → serialize → dict (compare with expected) + """ + + maxDiff = None + + def _load_special(self, name, suffix): + return (SPECIAL_DIR / f"{name}{suffix}").read_text() + + def test_builder_reconstruction(self): + """Builder dict → deserialize → reconstruct → compare with expected HCL.""" + builder_dict = json.loads(self._load_special("builder_basic", ".json")) + actual_hcl = _deserialize_and_reconstruct(builder_dict) + expected_hcl = self._load_special("builder_basic", ".tf") + self.assertMultiLineEqual(actual_hcl, expected_hcl) + + def test_builder_full_round_trip(self): + """Builder dict → reconstruct → reparse → compare with expected JSON.""" + builder_dict = json.loads(self._load_special("builder_basic", ".json")) + reconstructed_hcl = _deserialize_and_reconstruct(builder_dict) + actual = _parse_and_serialize(reconstructed_hcl) + expected = json.loads(self._load_special("builder_basic_reparsed", ".json")) + self.assertEqual(actual, expected) + + def test_builder_reserialization(self): + """Builder dict → deserialize → reserialize → compare with expected dict.""" + builder_dict = json.loads(self._load_special("builder_basic", ".json")) + reserialized = _deserialize_and_reserialize(builder_dict) + expected = json.loads(self._load_special("builder_basic_reserialized", ".json")) + self.assertEqual(reserialized, expected) diff --git a/test/unit/__init__.py b/test/unit/__init__.py index c497b297..e69de29b 100644 --- a/test/unit/__init__.py +++ b/test/unit/__init__.py @@ -1 +0,0 @@ -"""Unit tests -- tests that verify the code of this egg in isolation""" diff --git a/test/unit/rules/__init__.py b/test/unit/rules/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/unit/rules/test_abstract.py b/test/unit/rules/test_abstract.py new file mode 100644 index 00000000..8803effc --- /dev/null +++ b/test/unit/rules/test_abstract.py @@ -0,0 +1,178 @@ +from unittest import TestCase + +from lark import Token, Tree +from lark.tree import Meta + +from hcl2.rules.abstract import LarkElement, LarkToken, LarkRule +from hcl2.utils import SerializationOptions, SerializationContext + + +# --- Concrete stubs for testing ABCs --- + + +class ConcreteToken(LarkToken): + @staticmethod + def lark_name() -> str: + return "TEST_TOKEN" + + @property + def serialize_conversion(self): + return str + + +class IntToken(LarkToken): + @staticmethod + def lark_name() -> str: + return "INT_TOKEN" + + @property + def serialize_conversion(self): + return int + + +class ConcreteRule(LarkRule): + @staticmethod + def lark_name() -> str: + return "test_rule" + + def serialize(self, options=SerializationOptions(), context=SerializationContext()): + return "test" + + +# --- Tests --- + + +class TestLarkToken(TestCase): + def test_init_stores_value(self): + token = ConcreteToken("hello") + self.assertEqual(token.value, "hello") + + def test_value_property(self): + token = ConcreteToken(42) + self.assertEqual(token.value, 42) + + def test_set_value(self): + token = ConcreteToken("old") + token.set_value("new") + self.assertEqual(token.value, "new") + + def test_str(self): + token = ConcreteToken("hello") + self.assertEqual(str(token), "hello") + + def test_str_numeric(self): + token = ConcreteToken(42) + self.assertEqual(str(token), "42") + + def test_repr(self): + token = ConcreteToken("hello") + self.assertEqual(repr(token), "") + + def test_to_lark_returns_token(self): + token = ConcreteToken("val") + lark_token = token.to_lark() + self.assertIsInstance(lark_token, Token) + self.assertEqual(lark_token.type, "TEST_TOKEN") + self.assertEqual(lark_token, "val") + + def test_serialize_uses_conversion(self): + token = ConcreteToken("hello") + self.assertEqual(token.serialize(), "hello") + + def test_serialize_int_conversion(self): + token = IntToken("42") + result = token.serialize() + self.assertEqual(result, 42) + self.assertIsInstance(result, int) + + def test_lark_name(self): + self.assertEqual(ConcreteToken.lark_name(), "TEST_TOKEN") + + +class TestLarkRule(TestCase): + def test_init_sets_children(self): + t1 = ConcreteToken("a") + t2 = ConcreteToken("b") + rule = ConcreteRule([t1, t2]) + self.assertEqual(rule.children, [t1, t2]) + + def test_init_sets_parent_and_index(self): + t1 = ConcreteToken("a") + t2 = ConcreteToken("b") + rule = ConcreteRule([t1, t2]) + self.assertIs(t1._parent, rule) + self.assertIs(t2._parent, rule) + self.assertEqual(t1._index, 0) + self.assertEqual(t2._index, 1) + + def test_init_skips_none_children_for_parent_index(self): + t1 = ConcreteToken("a") + rule = ConcreteRule([None, t1, None]) + self.assertIs(t1._parent, rule) + self.assertEqual(t1._index, 1) + + def test_init_with_meta(self): + meta = Meta() + rule = ConcreteRule([], meta) + self.assertIs(rule._meta, meta) + + def test_init_without_meta(self): + rule = ConcreteRule([]) + self.assertIsNotNone(rule._meta) + + def test_parent_property(self): + child_rule = ConcreteRule([]) + parent_rule = ConcreteRule([child_rule]) + self.assertIs(child_rule.parent, parent_rule) + + def test_index_property(self): + child_rule = ConcreteRule([]) + ConcreteRule([child_rule]) + self.assertEqual(child_rule.index, 0) + + def test_children_property(self): + t = ConcreteToken("x") + rule = ConcreteRule([t]) + self.assertEqual(rule.children, [t]) + + def test_to_lark_builds_tree(self): + t1 = ConcreteToken("a") + t2 = ConcreteToken("b") + rule = ConcreteRule([t1, t2]) + tree = rule.to_lark() + self.assertIsInstance(tree, Tree) + self.assertEqual(tree.data, "test_rule") + self.assertEqual(len(tree.children), 2) + + def test_to_lark_skips_none_children(self): + t1 = ConcreteToken("a") + rule = ConcreteRule([None, t1, None]) + tree = rule.to_lark() + self.assertEqual(len(tree.children), 1) + self.assertEqual(tree.children[0], "a") + + def test_repr(self): + rule = ConcreteRule([]) + self.assertEqual(repr(rule), "") + + def test_nested_rules(self): + inner = ConcreteRule([ConcreteToken("x")]) + outer = ConcreteRule([inner]) + self.assertIs(inner.parent, outer) + tree = outer.to_lark() + self.assertEqual(tree.data, "test_rule") + self.assertEqual(len(tree.children), 1) + self.assertIsInstance(tree.children[0], Tree) + + +class TestLarkElement(TestCase): + def test_set_index(self): + token = ConcreteToken("x") + token.set_index(5) + self.assertEqual(token._index, 5) + + def test_set_parent(self): + token = ConcreteToken("x") + parent = ConcreteRule([]) + token.set_parent(parent) + self.assertIs(token._parent, parent) diff --git a/test/unit/rules/test_containers.py b/test/unit/rules/test_containers.py new file mode 100644 index 00000000..5ae28df4 --- /dev/null +++ b/test/unit/rules/test_containers.py @@ -0,0 +1,396 @@ +from unittest import TestCase + +from hcl2.rules.containers import ( + TupleRule, + ObjectElemKeyRule, + ObjectElemKeyExpressionRule, + ObjectElemKeyDotAccessor, + ObjectElemRule, + ObjectRule, +) +from hcl2.rules.expressions import ExpressionRule +from hcl2.rules.literal_rules import IdentifierRule, IntLitRule, FloatLitRule +from hcl2.rules.strings import StringRule, StringPartRule +from hcl2.rules.tokens import ( + LSQB, + RSQB, + LBRACE, + RBRACE, + LPAR, + RPAR, + DOT, + EQ, + COLON, + COMMA, + NAME, + DBLQUOTE, + STRING_CHARS, + IntLiteral, + FloatLiteral, +) +from hcl2.rules.whitespace import NewLineOrCommentRule +from hcl2.rules.tokens import NL_OR_COMMENT +from hcl2.utils import SerializationOptions, SerializationContext + + +# --- Stubs & Helpers --- + + +class StubExpression(ExpressionRule): + """Minimal ExpressionRule that serializes to a fixed value.""" + + def __init__(self, value): + self._stub_value = value + super().__init__([], None) + + def serialize(self, options=SerializationOptions(), context=SerializationContext()): + return self._stub_value + + +def _make_nlc(text): + return NewLineOrCommentRule([NL_OR_COMMENT(text)]) + + +def _make_identifier(name): + return IdentifierRule([NAME(name)]) + + +def _make_string_rule(text): + part = StringPartRule([STRING_CHARS(text)]) + return StringRule([DBLQUOTE(), part, DBLQUOTE()]) + + +def _make_object_elem_key(identifier_name): + return ObjectElemKeyRule([_make_identifier(identifier_name)]) + + +def _make_object_elem(key_name, expr_value, sep=None): + key = _make_object_elem_key(key_name) + separator = sep or EQ() + return ObjectElemRule([key, separator, StubExpression(expr_value)]) + + +# --- TupleRule tests --- + + +class TestTupleRule(TestCase): + def test_lark_name(self): + self.assertEqual(TupleRule.lark_name(), "tuple") + + def test_elements_empty_tuple(self): + rule = TupleRule([LSQB(), RSQB()]) + self.assertEqual(rule.elements, []) + + def test_elements_single(self): + expr = StubExpression(1) + rule = TupleRule([LSQB(), expr, RSQB()]) + self.assertEqual(rule.elements, [expr]) + + def test_elements_multiple(self): + e1 = StubExpression(1) + e2 = StubExpression(2) + e3 = StubExpression(3) + rule = TupleRule([LSQB(), e1, COMMA(), e2, COMMA(), e3, RSQB()]) + self.assertEqual(rule.elements, [e1, e2, e3]) + + def test_elements_skips_non_expressions(self): + e1 = StubExpression(1) + e2 = StubExpression(2) + nlc = _make_nlc("\n") + rule = TupleRule([LSQB(), nlc, e1, COMMA(), nlc, e2, RSQB()]) + self.assertEqual(len(rule.elements), 2) + + def test_serialize_default_returns_list(self): + rule = TupleRule( + [LSQB(), StubExpression(1), COMMA(), StubExpression(2), RSQB()] + ) + result = rule.serialize() + self.assertEqual(result, [1, 2]) + + def test_serialize_empty_returns_empty_list(self): + rule = TupleRule([LSQB(), RSQB()]) + self.assertEqual(rule.serialize(), []) + + def test_serialize_single_element(self): + rule = TupleRule([LSQB(), StubExpression(42), RSQB()]) + self.assertEqual(rule.serialize(), [42]) + + def test_serialize_wrap_tuples(self): + rule = TupleRule( + [LSQB(), StubExpression("a"), COMMA(), StubExpression("b"), RSQB()] + ) + opts = SerializationOptions(wrap_tuples=True) + result = rule.serialize(options=opts) + self.assertEqual(result, "${[a, b]}") + + def test_serialize_wrap_tuples_empty(self): + rule = TupleRule([LSQB(), RSQB()]) + opts = SerializationOptions(wrap_tuples=True) + result = rule.serialize(options=opts) + self.assertEqual(result, "${[]}") + + def test_serialize_inside_dollar_string(self): + rule = TupleRule([LSQB(), StubExpression("a"), RSQB()]) + ctx = SerializationContext(inside_dollar_string=True) + result = rule.serialize(context=ctx) + # Inside dollar string forces string representation + self.assertEqual(result, "[a]") + + def test_serialize_inside_dollar_string_no_extra_wrap(self): + rule = TupleRule( + [LSQB(), StubExpression("a"), COMMA(), StubExpression("b"), RSQB()] + ) + ctx = SerializationContext(inside_dollar_string=True) + result = rule.serialize(context=ctx) + self.assertEqual(result, "[a, b]") + + def test_serialize_wrap_tuples_inside_dollar_string(self): + rule = TupleRule([LSQB(), StubExpression("x"), RSQB()]) + opts = SerializationOptions(wrap_tuples=True) + ctx = SerializationContext(inside_dollar_string=True) + result = rule.serialize(options=opts, context=ctx) + # Already inside $, so no extra wrapping + self.assertEqual(result, "[x]") + + +# --- ObjectElemKeyRule tests --- + + +class TestObjectElemKeyRule(TestCase): + def test_lark_name(self): + self.assertEqual(ObjectElemKeyRule.lark_name(), "object_elem_key") + + def test_value_property_identifier(self): + ident = _make_identifier("foo") + rule = ObjectElemKeyRule([ident]) + self.assertIs(rule.value, ident) + + def test_serialize_identifier(self): + rule = ObjectElemKeyRule([_make_identifier("my_key")]) + self.assertEqual(rule.serialize(), "my_key") + + def test_serialize_int_lit(self): + rule = ObjectElemKeyRule([IntLitRule([IntLiteral("5")])]) + self.assertEqual(rule.serialize(), "5") + + def test_serialize_float_lit(self): + rule = ObjectElemKeyRule([FloatLitRule([FloatLiteral("3.14")])]) + self.assertEqual(rule.serialize(), "3.14") + + def test_serialize_string(self): + rule = ObjectElemKeyRule([_make_string_rule("k3")]) + self.assertEqual(rule.serialize(), '"k3"') + + +# --- ObjectElemKeyExpressionRule tests --- + + +class TestObjectElemKeyExpressionRule(TestCase): + def test_lark_name(self): + self.assertEqual( + ObjectElemKeyExpressionRule.lark_name(), "object_elem_key_expression" + ) + + def test_expression_property(self): + expr = StubExpression("5 + 5") + rule = ObjectElemKeyExpressionRule([LPAR(), expr, RPAR()]) + self.assertIs(rule.expression, expr) + + def test_serialize(self): + rule = ObjectElemKeyExpressionRule([LPAR(), StubExpression("5 + 5"), RPAR()]) + result = rule.serialize() + self.assertEqual(result, "${(5 + 5)}") + + def test_serialize_inside_dollar_string(self): + rule = ObjectElemKeyExpressionRule([LPAR(), StubExpression("5 + 5"), RPAR()]) + ctx = SerializationContext(inside_dollar_string=True) + result = rule.serialize(context=ctx) + self.assertEqual(result, "(5 + 5)") + + +# --- ObjectElemKeyDotAccessor tests --- + + +class TestObjectElemKeyDotAccessor(TestCase): + def test_lark_name(self): + self.assertEqual( + ObjectElemKeyDotAccessor.lark_name(), "object_elem_key_dot_accessor" + ) + + def test_identifiers_property(self): + i1 = _make_identifier("k5") + i2 = _make_identifier("attr") + i3 = _make_identifier("sub") + rule = ObjectElemKeyDotAccessor([i1, DOT(), i2, DOT(), i3]) + idents = rule.identifiers + self.assertEqual(len(idents), 3) + self.assertIs(idents[0], i1) + self.assertIs(idents[1], i2) + self.assertIs(idents[2], i3) + + def test_identifiers_two_segments(self): + i1 = _make_identifier("a") + i2 = _make_identifier("b") + rule = ObjectElemKeyDotAccessor([i1, DOT(), i2]) + self.assertEqual(len(rule.identifiers), 2) + + def test_serialize(self): + rule = ObjectElemKeyDotAccessor( + [ + _make_identifier("k5"), + DOT(), + _make_identifier("attr"), + DOT(), + _make_identifier("sub"), + ] + ) + self.assertEqual(rule.serialize(), "k5.attr.sub") + + def test_serialize_two_segments(self): + rule = ObjectElemKeyDotAccessor( + [_make_identifier("a"), DOT(), _make_identifier("b")] + ) + self.assertEqual(rule.serialize(), "a.b") + + +# --- ObjectElemRule tests --- + + +class TestObjectElemRule(TestCase): + def test_lark_name(self): + self.assertEqual(ObjectElemRule.lark_name(), "object_elem") + + def test_key_property(self): + key = _make_object_elem_key("foo") + rule = ObjectElemRule([key, EQ(), StubExpression("bar")]) + self.assertIs(rule.key, key) + + def test_expression_property(self): + expr = StubExpression("bar") + rule = ObjectElemRule([_make_object_elem_key("foo"), EQ(), expr]) + self.assertIs(rule.expression, expr) + + def test_serialize_with_eq(self): + rule = _make_object_elem("name", "value") + self.assertEqual(rule.serialize(), {"name": "value"}) + + def test_serialize_with_colon(self): + rule = ObjectElemRule([_make_object_elem_key("k"), COLON(), StubExpression(42)]) + self.assertEqual(rule.serialize(), {"k": 42}) + + def test_serialize_int_value(self): + rule = _make_object_elem("count", 5) + self.assertEqual(rule.serialize(), {"count": 5}) + + def test_serialize_string_key(self): + key = ObjectElemKeyRule([_make_string_rule("quoted")]) + rule = ObjectElemRule([key, EQ(), StubExpression("val")]) + self.assertEqual(rule.serialize(), {'"quoted"': "val"}) + + +# --- ObjectRule tests --- + + +class TestObjectRule(TestCase): + def test_lark_name(self): + self.assertEqual(ObjectRule.lark_name(), "object") + + def test_elements_empty(self): + rule = ObjectRule([LBRACE(), RBRACE()]) + self.assertEqual(rule.elements, []) + + def test_elements_single(self): + elem = _make_object_elem("k", "v") + rule = ObjectRule([LBRACE(), elem, RBRACE()]) + self.assertEqual(rule.elements, [elem]) + + def test_elements_multiple(self): + e1 = _make_object_elem("a", 1) + e2 = _make_object_elem("b", 2) + rule = ObjectRule([LBRACE(), e1, e2, RBRACE()]) + self.assertEqual(rule.elements, [e1, e2]) + + def test_elements_skips_non_elem(self): + e1 = _make_object_elem("a", 1) + nlc = _make_nlc("\n") + rule = ObjectRule([LBRACE(), nlc, e1, nlc, RBRACE()]) + self.assertEqual(rule.elements, [e1]) + + def test_serialize_default_returns_dict(self): + rule = ObjectRule( + [ + LBRACE(), + _make_object_elem("k1", "v1"), + _make_object_elem("k2", "v2"), + RBRACE(), + ] + ) + result = rule.serialize() + self.assertEqual(result, {"k1": "v1", "k2": "v2"}) + + def test_serialize_empty_returns_empty_dict(self): + rule = ObjectRule([LBRACE(), RBRACE()]) + self.assertEqual(rule.serialize(), {}) + + def test_serialize_single_element(self): + rule = ObjectRule([LBRACE(), _make_object_elem("x", 42), RBRACE()]) + self.assertEqual(rule.serialize(), {"x": 42}) + + def test_serialize_wrap_objects(self): + rule = ObjectRule( + [ + LBRACE(), + _make_object_elem("k1", "v1"), + _make_object_elem("k2", "v2"), + RBRACE(), + ] + ) + opts = SerializationOptions(wrap_objects=True) + result = rule.serialize(options=opts) + # Result is "{k1 = v1, k2 = v2}" wrapped in ${}, giving ${{...}} + self.assertEqual(result, "${{k1 = v1, k2 = v2}}") + + def test_serialize_wrap_objects_empty(self): + rule = ObjectRule([LBRACE(), RBRACE()]) + opts = SerializationOptions(wrap_objects=True) + result = rule.serialize(options=opts) + self.assertEqual(result, "${{}}") + + def test_serialize_inside_dollar_string(self): + rule = ObjectRule( + [ + LBRACE(), + _make_object_elem("k", "v"), + RBRACE(), + ] + ) + ctx = SerializationContext(inside_dollar_string=True) + result = rule.serialize(context=ctx) + # Inside dollar string forces string representation + self.assertEqual(result, "{k = v}") + + def test_serialize_inside_dollar_string_no_extra_wrap(self): + rule = ObjectRule( + [ + LBRACE(), + _make_object_elem("a", 1), + _make_object_elem("b", 2), + RBRACE(), + ] + ) + ctx = SerializationContext(inside_dollar_string=True) + result = rule.serialize(context=ctx) + self.assertEqual(result, "{a = 1, b = 2}") + + def test_serialize_wrap_objects_inside_dollar_string(self): + rule = ObjectRule( + [ + LBRACE(), + _make_object_elem("k", "v"), + RBRACE(), + ] + ) + opts = SerializationOptions(wrap_objects=True) + ctx = SerializationContext(inside_dollar_string=True) + result = rule.serialize(options=opts, context=ctx) + self.assertEqual(result, "{k = v}") diff --git a/test/unit/rules/test_expressions.py b/test/unit/rules/test_expressions.py new file mode 100644 index 00000000..16800ed0 --- /dev/null +++ b/test/unit/rules/test_expressions.py @@ -0,0 +1,489 @@ +from unittest import TestCase + +from hcl2.rules.abstract import LarkRule +from hcl2.rules.expressions import ( + ExpressionRule, + ExprTermRule, + ConditionalRule, + BinaryTermRule, + BinaryOpRule, + UnaryOpRule, +) +from hcl2.rules.literal_rules import BinaryOperatorRule, IdentifierRule +from hcl2.rules.tokens import ( + LPAR, + RPAR, + QMARK, + COLON, + BINARY_OP, + NAME, + StringToken, +) +from hcl2.utils import SerializationOptions, SerializationContext + + +# --- Stubs & helpers --- + + +class StubExpression(ExpressionRule): + """Minimal concrete ExpressionRule that serializes to a fixed string.""" + + def __init__(self, value, children=None): + self._stub_value = value + super().__init__(children or [], None) + + def serialize(self, options=SerializationOptions(), context=SerializationContext()): + return self._stub_value + + +class NonExpressionRule(LarkRule): + """A rule that is NOT an ExpressionRule, for parent-chain tests.""" + + @staticmethod + def lark_name(): + return "non_expression" + + def serialize(self, options=SerializationOptions(), context=SerializationContext()): + return "non_expr" + + +def _make_expr_term(value): + """Build ExprTermRule wrapping a StubExpression (no parens).""" + return ExprTermRule([StubExpression(value)]) + + +def _make_paren_expr_term(value): + """Build ExprTermRule wrapping a StubExpression in parentheses.""" + return ExprTermRule([LPAR(), StubExpression(value), RPAR()]) + + +def _make_binary_operator(op_str): + """Build BinaryOperatorRule for an operator string.""" + return BinaryOperatorRule([BINARY_OP(op_str)]) + + +def _make_binary_term(op_str, rhs_value): + """Build BinaryTermRule with given operator and RHS value.""" + return BinaryTermRule([_make_binary_operator(op_str), _make_expr_term(rhs_value)]) + + +MINUS_TOKEN = StringToken["MINUS"] +NOT_TOKEN = StringToken["NOT"] + + +# --- ExprTermRule tests --- + + +class TestExprTermRule(TestCase): + def test_lark_name(self): + self.assertEqual(ExprTermRule.lark_name(), "expr_term") + + def test_construction_without_parens(self): + stub = StubExpression("a") + rule = ExprTermRule([stub]) + self.assertFalse(rule.parentheses) + + def test_construction_without_parens_children_structure(self): + stub = StubExpression("a") + rule = ExprTermRule([stub]) + # children: [None, None, stub, None, None] + self.assertEqual(len(rule.children), 5) + self.assertIsNone(rule.children[0]) + self.assertIsNone(rule.children[1]) + self.assertIs(rule.children[2], stub) + self.assertIsNone(rule.children[3]) + self.assertIsNone(rule.children[4]) + + def test_construction_with_parens(self): + stub = StubExpression("a") + rule = ExprTermRule([LPAR(), stub, RPAR()]) + self.assertTrue(rule.parentheses) + + def test_construction_with_parens_children_structure(self): + stub = StubExpression("a") + lpar = LPAR() + rpar = RPAR() + rule = ExprTermRule([lpar, stub, rpar]) + # children: [LPAR, None, stub, None, RPAR] + self.assertEqual(len(rule.children), 5) + self.assertIs(rule.children[0], lpar) + self.assertIsNone(rule.children[1]) + self.assertIs(rule.children[2], stub) + self.assertIsNone(rule.children[3]) + self.assertIs(rule.children[4], rpar) + + def test_expression_property(self): + stub = StubExpression("a") + rule = ExprTermRule([stub]) + self.assertIs(rule.expression, stub) + + def test_expression_property_with_parens(self): + stub = StubExpression("a") + rule = ExprTermRule([LPAR(), stub, RPAR()]) + self.assertIs(rule.expression, stub) + + def test_serialize_no_parens_delegates_to_inner(self): + rule = _make_expr_term("hello") + self.assertEqual(rule.serialize(), "hello") + + def test_serialize_no_parens_passes_through_int(self): + stub = StubExpression(42) + rule = ExprTermRule([stub]) + self.assertEqual(rule.serialize(), 42) + + def test_serialize_with_parens_wraps_and_dollar(self): + rule = _make_paren_expr_term("a") + result = rule.serialize() + self.assertEqual(result, "${(a)}") + + def test_serialize_with_parens_inside_dollar_string(self): + rule = _make_paren_expr_term("a") + ctx = SerializationContext(inside_dollar_string=True) + result = rule.serialize(context=ctx) + # Inside dollar string: wraps in () but NOT in ${} + self.assertEqual(result, "(a)") + + def test_serialize_sets_inside_parentheses_context(self): + """When parenthesized, inner expression should see inside_parentheses=True.""" + seen_context = {} + + class ContextCapture(ExpressionRule): + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ): + seen_context["inside_parentheses"] = context.inside_parentheses + return "x" + + rule = ExprTermRule([LPAR(), ContextCapture([]), RPAR()]) + rule.serialize() + self.assertTrue(seen_context["inside_parentheses"]) + + def test_serialize_no_parens_preserves_inside_parentheses(self): + """Without parens, inside_parentheses passes through from caller context.""" + seen_context = {} + + class ContextCapture(ExpressionRule): + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ): + seen_context["inside_parentheses"] = context.inside_parentheses + return "x" + + rule = ExprTermRule([ContextCapture([])]) + rule.serialize(context=SerializationContext(inside_parentheses=False)) + self.assertFalse(seen_context["inside_parentheses"]) + + +# --- ConditionalRule tests --- + + +class TestConditionalRule(TestCase): + def _make_conditional(self, cond_val="cond", true_val="yes", false_val="no"): + return ConditionalRule( + [ + StubExpression(cond_val), + QMARK(), + StubExpression(true_val), + COLON(), + StubExpression(false_val), + ] + ) + + def test_lark_name(self): + self.assertEqual(ConditionalRule.lark_name(), "conditional") + + def test_construction_inserts_optional_slots(self): + rule = self._make_conditional() + # Should have 8 children after _insert_optionals at [2, 4, 6] + self.assertEqual(len(rule.children), 8) + + def test_condition_property(self): + cond = StubExpression("cond") + rule = ConditionalRule( + [cond, QMARK(), StubExpression("t"), COLON(), StubExpression("f")] + ) + self.assertIs(rule.condition, cond) + + def test_if_true_property(self): + true_expr = StubExpression("yes") + rule = ConditionalRule( + [ + StubExpression("c"), + QMARK(), + true_expr, + COLON(), + StubExpression("f"), + ] + ) + self.assertIs(rule.if_true, true_expr) + + def test_if_false_property(self): + false_expr = StubExpression("no") + rule = ConditionalRule( + [ + StubExpression("c"), + QMARK(), + StubExpression("t"), + COLON(), + false_expr, + ] + ) + self.assertIs(rule.if_false, false_expr) + + def test_serialize_format(self): + rule = self._make_conditional("a", "b", "c") + result = rule.serialize() + self.assertEqual(result, "${a ? b : c}") + + def test_serialize_wraps_in_dollar_string(self): + rule = self._make_conditional("x", "y", "z") + result = rule.serialize() + self.assertTrue(result.startswith("${")) + self.assertTrue(result.endswith("}")) + + def test_serialize_no_double_wrap_inside_dollar_string(self): + rule = self._make_conditional("x", "y", "z") + ctx = SerializationContext(inside_dollar_string=True) + result = rule.serialize(context=ctx) + self.assertEqual(result, "x ? y : z") + + def test_serialize_force_parens_no_parent(self): + """force_operation_parentheses with no parent → no wrapping.""" + rule = self._make_conditional("a", "b", "c") + opts = SerializationOptions(force_operation_parentheses=True) + result = rule.serialize(options=opts) + # No parent, so _wrap_into_parentheses returns unchanged + self.assertEqual(result, "${a ? b : c}") + + def test_serialize_force_parens_with_expression_parent(self): + """force_operation_parentheses with ExpressionRule parent → wraps.""" + rule = self._make_conditional("a", "b", "c") + # Nest inside another expression to set parent + StubExpression("outer", children=[rule]) + opts = SerializationOptions(force_operation_parentheses=True) + result = rule.serialize(options=opts) + self.assertEqual(result, "${(a ? b : c)}") + + +# --- BinaryTermRule tests --- + + +class TestBinaryTermRule(TestCase): + def test_lark_name(self): + self.assertEqual(BinaryTermRule.lark_name(), "binary_term") + + def test_construction_inserts_optional(self): + rule = _make_binary_term("+", "b") + # [BinaryOperatorRule, None, ExprTermRule] + self.assertEqual(len(rule.children), 3) + self.assertIsNone(rule.children[1]) + + def test_binary_operator_property(self): + op = _make_binary_operator("+") + rhs = _make_expr_term("b") + rule = BinaryTermRule([op, rhs]) + self.assertIs(rule.binary_operator, op) + + def test_expr_term_property(self): + op = _make_binary_operator("+") + rhs = _make_expr_term("b") + rule = BinaryTermRule([op, rhs]) + self.assertIs(rule.expr_term, rhs) + + def test_serialize(self): + rule = _make_binary_term("+", "b") + result = rule.serialize() + self.assertEqual(result, "+ b") + + def test_serialize_equals_operator(self): + rule = _make_binary_term("==", "x") + self.assertEqual(rule.serialize(), "== x") + + def test_serialize_and_operator(self): + rule = _make_binary_term("&&", "y") + self.assertEqual(rule.serialize(), "&& y") + + +# --- BinaryOpRule tests --- + + +class TestBinaryOpRule(TestCase): + def _make_binary_op(self, lhs_val, op_str, rhs_val): + lhs = _make_expr_term(lhs_val) + bt = _make_binary_term(op_str, rhs_val) + return BinaryOpRule([lhs, bt, None]) + + def test_lark_name(self): + self.assertEqual(BinaryOpRule.lark_name(), "binary_op") + + def test_expr_term_property(self): + lhs = _make_expr_term("a") + bt = _make_binary_term("+", "b") + rule = BinaryOpRule([lhs, bt, None]) + self.assertIs(rule.expr_term, lhs) + + def test_binary_term_property(self): + lhs = _make_expr_term("a") + bt = _make_binary_term("+", "b") + rule = BinaryOpRule([lhs, bt, None]) + self.assertIs(rule.binary_term, bt) + + def test_serialize_addition(self): + rule = self._make_binary_op("a", "+", "b") + self.assertEqual(rule.serialize(), "${a + b}") + + def test_serialize_equality(self): + rule = self._make_binary_op("x", "==", "y") + self.assertEqual(rule.serialize(), "${x == y}") + + def test_serialize_and(self): + rule = self._make_binary_op("p", "&&", "q") + self.assertEqual(rule.serialize(), "${p && q}") + + def test_serialize_multiply(self): + rule = self._make_binary_op("a", "*", "b") + self.assertEqual(rule.serialize(), "${a * b}") + + def test_serialize_no_double_wrap_inside_dollar_string(self): + rule = self._make_binary_op("a", "+", "b") + ctx = SerializationContext(inside_dollar_string=True) + result = rule.serialize(context=ctx) + self.assertEqual(result, "a + b") + + def test_serialize_force_parens_no_parent(self): + """No parent → _wrap_into_parentheses returns unchanged.""" + rule = self._make_binary_op("a", "+", "b") + opts = SerializationOptions(force_operation_parentheses=True) + result = rule.serialize(options=opts) + self.assertEqual(result, "${a + b}") + + def test_serialize_force_parens_with_expression_parent(self): + """With ExpressionRule parent → wraps in parens.""" + rule = self._make_binary_op("a", "+", "b") + StubExpression("outer", children=[rule]) + opts = SerializationOptions(force_operation_parentheses=True) + result = rule.serialize(options=opts) + self.assertEqual(result, "${(a + b)}") + + def test_serialize_force_parens_inside_dollar_string_with_parent(self): + """Inside dollar string + parent → parens without extra ${}.""" + rule = self._make_binary_op("a", "+", "b") + StubExpression("outer", children=[rule]) + opts = SerializationOptions(force_operation_parentheses=True) + ctx = SerializationContext(inside_dollar_string=True) + result = rule.serialize(options=opts, context=ctx) + self.assertEqual(result, "(a + b)") + + +# --- UnaryOpRule tests --- + + +class TestUnaryOpRule(TestCase): + def _make_unary(self, op_str, operand_val): + token_cls = MINUS_TOKEN if op_str == "-" else NOT_TOKEN + token = token_cls(op_str) + expr_term = _make_expr_term(operand_val) + return UnaryOpRule([token, expr_term]) + + def test_lark_name(self): + self.assertEqual(UnaryOpRule.lark_name(), "unary_op") + + def test_operator_property_minus(self): + rule = self._make_unary("-", "x") + self.assertEqual(rule.operator, "-") + + def test_operator_property_not(self): + rule = self._make_unary("!", "x") + self.assertEqual(rule.operator, "!") + + def test_expr_term_property(self): + expr_term = _make_expr_term("x") + token = MINUS_TOKEN("-") + rule = UnaryOpRule([token, expr_term]) + self.assertIs(rule.expr_term, expr_term) + + def test_serialize_minus(self): + rule = self._make_unary("-", "a") + self.assertEqual(rule.serialize(), "${-a}") + + def test_serialize_not(self): + rule = self._make_unary("!", "flag") + self.assertEqual(rule.serialize(), "${!flag}") + + def test_serialize_no_double_wrap_inside_dollar_string(self): + rule = self._make_unary("-", "x") + ctx = SerializationContext(inside_dollar_string=True) + result = rule.serialize(context=ctx) + self.assertEqual(result, "-x") + + def test_serialize_force_parens_no_parent(self): + rule = self._make_unary("-", "x") + opts = SerializationOptions(force_operation_parentheses=True) + result = rule.serialize(options=opts) + self.assertEqual(result, "${-x}") + + def test_serialize_force_parens_with_expression_parent(self): + rule = self._make_unary("-", "x") + StubExpression("outer", children=[rule]) + opts = SerializationOptions(force_operation_parentheses=True) + result = rule.serialize(options=opts) + self.assertEqual(result, "${(-x)}") + + +# --- ExpressionRule._wrap_into_parentheses tests --- + + +class TestWrapIntoParenthesesMethod(TestCase): + def test_returns_unchanged_when_inside_parentheses(self): + expr = StubExpression("test") + ctx = SerializationContext(inside_parentheses=True) + result = expr._wrap_into_parentheses("${x}", context=ctx) + self.assertEqual(result, "${x}") + + def test_returns_unchanged_when_no_parent(self): + expr = StubExpression("test") + result = expr._wrap_into_parentheses("${x}") + self.assertEqual(result, "${x}") + + def test_returns_unchanged_when_parent_not_expression(self): + expr = StubExpression("test") + NonExpressionRule([expr]) + result = expr._wrap_into_parentheses("${x}") + self.assertEqual(result, "${x}") + + def test_wraps_when_parent_is_expression(self): + expr = StubExpression("test") + StubExpression("outer", children=[expr]) + result = expr._wrap_into_parentheses("${x}") + self.assertEqual(result, "${(x)}") + + def test_wraps_plain_string_when_parent_is_expression(self): + expr = StubExpression("test") + StubExpression("outer", children=[expr]) + result = expr._wrap_into_parentheses("a + b") + self.assertEqual(result, "(a + b)") + + def test_expr_term_parent_with_expression_grandparent(self): + """Parent is ExprTermRule, grandparent is ExpressionRule → wraps.""" + inner = StubExpression("test") + expr_term = ExprTermRule([inner]) + # inner is now at expr_term._children[2], parent=expr_term + StubExpression("grandparent", children=[expr_term]) + # expr_term.parent = grandparent (ExpressionRule) + result = inner._wrap_into_parentheses("${x}") + self.assertEqual(result, "${(x)}") + + def test_expr_term_parent_with_non_expression_grandparent(self): + """Parent is ExprTermRule, grandparent is NOT ExpressionRule → no wrap.""" + inner = StubExpression("test") + expr_term = ExprTermRule([inner]) + NonExpressionRule([expr_term]) + result = inner._wrap_into_parentheses("${x}") + self.assertEqual(result, "${x}") + + def test_expr_term_parent_with_no_grandparent(self): + """Parent is ExprTermRule with no parent → no wrap.""" + inner = StubExpression("test") + ExprTermRule([inner]) + result = inner._wrap_into_parentheses("${x}") + self.assertEqual(result, "${x}") diff --git a/test/unit/rules/test_literal_rules.py b/test/unit/rules/test_literal_rules.py new file mode 100644 index 00000000..f6b8b94c --- /dev/null +++ b/test/unit/rules/test_literal_rules.py @@ -0,0 +1,95 @@ +from unittest import TestCase + +from hcl2.rules.literal_rules import ( + TokenRule, + KeywordRule, + IdentifierRule, + IntLitRule, + FloatLitRule, + BinaryOperatorRule, +) +from hcl2.rules.tokens import NAME, BINARY_OP, IntLiteral, FloatLiteral + + +class TestKeywordRule(TestCase): + def test_lark_name(self): + self.assertEqual(KeywordRule.lark_name(), "keyword") + + def test_token_property(self): + token = NAME("true") + rule = KeywordRule([token]) + self.assertIs(rule.token, token) + + def test_serialize(self): + rule = KeywordRule([NAME("true")]) + self.assertEqual(rule.serialize(), "true") + + +class TestIdentifierRule(TestCase): + def test_lark_name(self): + self.assertEqual(IdentifierRule.lark_name(), "identifier") + + def test_serialize(self): + rule = IdentifierRule([NAME("my_var")]) + self.assertEqual(rule.serialize(), "my_var") + + def test_token_property(self): + token = NAME("foo") + rule = IdentifierRule([token]) + self.assertIs(rule.token, token) + + +class TestIntLitRule(TestCase): + def test_lark_name(self): + self.assertEqual(IntLitRule.lark_name(), "int_lit") + + def test_serialize_returns_int(self): + rule = IntLitRule([IntLiteral("42")]) + result = rule.serialize() + self.assertEqual(result, 42) + self.assertIsInstance(result, int) + + +class TestFloatLitRule(TestCase): + def test_lark_name(self): + self.assertEqual(FloatLitRule.lark_name(), "float_lit") + + def test_serialize_returns_float(self): + rule = FloatLitRule([FloatLiteral("3.14")]) + result = rule.serialize() + self.assertAlmostEqual(result, 3.14) + self.assertIsInstance(result, float) + + +class TestBinaryOperatorRule(TestCase): + def test_lark_name(self): + self.assertEqual(BinaryOperatorRule.lark_name(), "binary_operator") + + def test_serialize_plus(self): + rule = BinaryOperatorRule([BINARY_OP("+")]) + self.assertEqual(rule.serialize(), "+") + + def test_serialize_equals(self): + rule = BinaryOperatorRule([BINARY_OP("==")]) + self.assertEqual(rule.serialize(), "==") + + def test_serialize_and(self): + rule = BinaryOperatorRule([BINARY_OP("&&")]) + self.assertEqual(rule.serialize(), "&&") + + def test_serialize_or(self): + rule = BinaryOperatorRule([BINARY_OP("||")]) + self.assertEqual(rule.serialize(), "||") + + def test_serialize_gt(self): + rule = BinaryOperatorRule([BINARY_OP(">")]) + self.assertEqual(rule.serialize(), ">") + + def test_serialize_multiply(self): + rule = BinaryOperatorRule([BINARY_OP("*")]) + self.assertEqual(rule.serialize(), "*") + + def test_token_property(self): + token = BINARY_OP("+") + rule = BinaryOperatorRule([token]) + self.assertIs(rule.token, token) diff --git a/test/unit/rules/test_strings.py b/test/unit/rules/test_strings.py new file mode 100644 index 00000000..67fec075 --- /dev/null +++ b/test/unit/rules/test_strings.py @@ -0,0 +1,247 @@ +from unittest import TestCase + +from hcl2.rules.expressions import ExpressionRule +from hcl2.rules.strings import ( + InterpolationRule, + StringPartRule, + StringRule, + HeredocTemplateRule, + HeredocTrimTemplateRule, +) +from hcl2.rules.tokens import ( + INTERP_START, + RBRACE, + DBLQUOTE, + STRING_CHARS, + ESCAPED_INTERPOLATION, + HEREDOC_TEMPLATE, + HEREDOC_TRIM_TEMPLATE, +) +from hcl2.utils import SerializationOptions, SerializationContext + + +# --- Stubs --- + + +class StubExpression(ExpressionRule): + """Minimal ExpressionRule that serializes to a fixed string.""" + + def __init__(self, value): + self._stub_value = value + super().__init__([], None) + + def serialize(self, options=SerializationOptions(), context=SerializationContext()): + return self._stub_value + + +# --- Helpers --- + + +def _make_string_part_chars(text): + return StringPartRule([STRING_CHARS(text)]) + + +def _make_string_part_escaped(text): + return StringPartRule([ESCAPED_INTERPOLATION(text)]) + + +def _make_string_part_interpolation(expr_value): + interp = InterpolationRule([INTERP_START(), StubExpression(expr_value), RBRACE()]) + return StringPartRule([interp]) + + +def _make_string(parts): + """Build StringRule from a list of StringPartRule children.""" + return StringRule([DBLQUOTE(), *parts, DBLQUOTE()]) + + +# --- InterpolationRule tests --- + + +class TestInterpolationRule(TestCase): + def test_lark_name(self): + self.assertEqual(InterpolationRule.lark_name(), "interpolation") + + def test_expression_property(self): + expr = StubExpression("var.name") + rule = InterpolationRule([INTERP_START(), expr, RBRACE()]) + self.assertIs(rule.expression, expr) + + def test_serialize_wraps_in_dollar_string(self): + rule = InterpolationRule([INTERP_START(), StubExpression("var.name"), RBRACE()]) + self.assertEqual(rule.serialize(), "${var.name}") + + def test_serialize_idempotent_if_already_dollar(self): + rule = InterpolationRule([INTERP_START(), StubExpression("${x}"), RBRACE()]) + self.assertEqual(rule.serialize(), "${x}") + + def test_serialize_expression_result(self): + rule = InterpolationRule([INTERP_START(), StubExpression("a + b"), RBRACE()]) + self.assertEqual(rule.serialize(), "${a + b}") + + +# --- StringPartRule tests --- + + +class TestStringPartRule(TestCase): + def test_lark_name(self): + self.assertEqual(StringPartRule.lark_name(), "string_part") + + def test_content_property_string_chars(self): + token = STRING_CHARS("hello") + rule = StringPartRule([token]) + self.assertIs(rule.content, token) + + def test_serialize_string_chars(self): + rule = _make_string_part_chars("hello world") + self.assertEqual(rule.serialize(), "hello world") + + def test_serialize_escaped_interpolation(self): + rule = _make_string_part_escaped("$${aws:username}") + self.assertEqual(rule.serialize(), "$${aws:username}") + + def test_serialize_interpolation(self): + rule = _make_string_part_interpolation("var.name") + self.assertEqual(rule.serialize(), "${var.name}") + + def test_content_property_interpolation(self): + interp = InterpolationRule([INTERP_START(), StubExpression("x"), RBRACE()]) + rule = StringPartRule([interp]) + self.assertIs(rule.content, interp) + + +# --- StringRule tests --- + + +class TestStringRule(TestCase): + def test_lark_name(self): + self.assertEqual(StringRule.lark_name(), "string") + + def test_string_parts_property(self): + p1 = _make_string_part_chars("hello") + p2 = _make_string_part_chars(" world") + rule = _make_string([p1, p2]) + self.assertEqual(rule.string_parts, [p1, p2]) + + def test_string_parts_empty(self): + rule = _make_string([]) + self.assertEqual(rule.string_parts, []) + + def test_serialize_plain_string(self): + rule = _make_string([_make_string_part_chars("hello")]) + self.assertEqual(rule.serialize(), '"hello"') + + def test_serialize_empty_string(self): + rule = _make_string([]) + self.assertEqual(rule.serialize(), '""') + + def test_serialize_concatenated_parts(self): + rule = _make_string( + [ + _make_string_part_chars("prefix:"), + _make_string_part_interpolation("var.name"), + _make_string_part_chars("-suffix"), + ] + ) + self.assertEqual(rule.serialize(), '"prefix:${var.name}-suffix"') + + def test_serialize_escaped_and_interpolation(self): + rule = _make_string( + [ + _make_string_part_interpolation("bar"), + _make_string_part_escaped("$${baz:bat}"), + ] + ) + self.assertEqual(rule.serialize(), '"${bar}$${baz:bat}"') + + def test_serialize_only_interpolation(self): + rule = _make_string([_make_string_part_interpolation("x")]) + self.assertEqual(rule.serialize(), '"${x}"') + + +# --- HeredocTemplateRule tests --- + + +class TestHeredocTemplateRule(TestCase): + def test_lark_name(self): + self.assertEqual(HeredocTemplateRule.lark_name(), "heredoc_template") + + def test_heredoc_property(self): + token = HEREDOC_TEMPLATE("< str: + return "test_inline" + + def serialize(self, options=SerializationOptions(), context=SerializationContext()): + return "test" + + +def _make_nlc(text): + """Helper: build NewLineOrCommentRule from a string.""" + return NewLineOrCommentRule([NL_OR_COMMENT(text)]) + + +# --- Tests --- + + +class TestNewLineOrCommentRule(TestCase): + def test_lark_name(self): + self.assertEqual(NewLineOrCommentRule.lark_name(), "new_line_or_comment") + + def test_serialize_newline(self): + rule = _make_nlc("\n") + self.assertEqual(rule.serialize(), "\n") + + def test_serialize_line_comment(self): + rule = _make_nlc("// this is a comment\n") + self.assertEqual(rule.serialize(), "// this is a comment\n") + + def test_serialize_hash_comment(self): + rule = _make_nlc("# hash comment\n") + self.assertEqual(rule.serialize(), "# hash comment\n") + + def test_to_list_bare_newline_returns_none(self): + rule = _make_nlc("\n") + self.assertIsNone(rule.to_list()) + + def test_to_list_line_comment(self): + rule = _make_nlc("// my comment\n") + result = rule.to_list() + self.assertEqual(result, ["my comment"]) + + def test_to_list_hash_comment(self): + rule = _make_nlc("# my comment\n") + result = rule.to_list() + self.assertEqual(result, ["my comment"]) + + def test_to_list_block_comment(self): + rule = _make_nlc("/* block comment */\n") + result = rule.to_list() + self.assertEqual(result, ["block comment"]) + + def test_to_list_multiple_comments(self): + rule = _make_nlc("// first\n// second\n") + result = rule.to_list() + self.assertIn("first", result) + self.assertIn("second", result) + + def test_token_property(self): + token = NL_OR_COMMENT("\n") + rule = NewLineOrCommentRule([token]) + self.assertIs(rule.token, token) + + +class TestInlineCommentMixIn(TestCase): + def test_insert_optionals_inserts_none_where_no_comment(self): + from hcl2.rules.tokens import NAME + + token = NAME("x") + children = [token, NAME("y")] + mixin = ConcreteInlineComment.__new__(ConcreteInlineComment) + mixin._insert_optionals(children, [1]) + # Should have inserted None at index 1, pushing NAME("y") to index 2 + self.assertIsNone(children[1]) + self.assertEqual(len(children), 3) + + def test_insert_optionals_leaves_comment_in_place(self): + comment = _make_nlc("// comment\n") + from hcl2.rules.tokens import NAME + + children = [NAME("x"), comment] + mixin = ConcreteInlineComment.__new__(ConcreteInlineComment) + mixin._insert_optionals(children, [1]) + # Should NOT insert None since index 1 is already a NewLineOrCommentRule + self.assertIs(children[1], comment) + self.assertEqual(len(children), 2) + + def test_insert_optionals_handles_index_error(self): + children = [_make_nlc("\n")] + mixin = ConcreteInlineComment.__new__(ConcreteInlineComment) + mixin._insert_optionals(children, [3]) + # Should insert None at index 3 + self.assertEqual(len(children), 2) + self.assertIsNone(children[1]) + + def test_inline_comments_collects_from_children(self): + comment = _make_nlc("// hello\n") + from hcl2.rules.tokens import NAME + + rule = ConcreteInlineComment([NAME("x"), comment]) + result = rule.inline_comments() + self.assertEqual(result, ["hello"]) + + def test_inline_comments_skips_bare_newlines(self): + newline = _make_nlc("\n") + from hcl2.rules.tokens import NAME + + rule = ConcreteInlineComment([NAME("x"), newline]) + result = rule.inline_comments() + self.assertEqual(result, []) + + def test_inline_comments_recursive(self): + comment = _make_nlc("// inner\n") + inner = ConcreteInlineComment([comment]) + outer = ConcreteInlineComment([inner]) + result = outer.inline_comments() + self.assertEqual(result, ["inner"]) + + def test_inline_comments_empty(self): + from hcl2.rules.tokens import NAME + + rule = ConcreteInlineComment([NAME("x")]) + result = rule.inline_comments() + self.assertEqual(result, []) diff --git a/test/unit/test_api.py b/test/unit/test_api.py new file mode 100644 index 00000000..a87d9e32 --- /dev/null +++ b/test/unit/test_api.py @@ -0,0 +1,244 @@ +from io import StringIO +from unittest import TestCase + +from hcl2.api import ( + load, + loads, + dump, + dumps, + parse, + parses, + parse_to_tree, + parses_to_tree, + from_dict, + from_json, + reconstruct, + transform, + serialize, +) +from hcl2.rules.base import StartRule +from hcl2.utils import SerializationOptions +from hcl2.deserializer import DeserializerOptions +from hcl2.formatter import FormatterOptions +from lark.tree import Tree + + +SIMPLE_HCL = 'x = 5\n' +SIMPLE_DICT = {"x": 5} + +BLOCK_HCL = 'resource "aws_instance" "example" {\n ami = "abc-123"\n}\n' + + +class TestLoads(TestCase): + + def test_simple_attribute(self): + result = loads(SIMPLE_HCL) + self.assertEqual(result["x"], 5) + + def test_returns_dict(self): + result = loads(SIMPLE_HCL) + self.assertIsInstance(result, dict) + + def test_with_serialization_options(self): + result = loads(SIMPLE_HCL, serialization_options=SerializationOptions(with_comments=False)) + self.assertIsInstance(result, dict) + self.assertEqual(result["x"], 5) + + def test_with_meta_option(self): + result = loads(SIMPLE_HCL, serialization_options=SerializationOptions(with_meta=True)) + self.assertIn("x", result) + + def test_block_parsing(self): + result = loads(BLOCK_HCL) + self.assertIn("resource", result) + + +class TestLoad(TestCase): + + def test_from_file(self): + f = StringIO(SIMPLE_HCL) + result = load(f) + self.assertEqual(result["x"], 5) + + def test_with_serialization_options(self): + f = StringIO(SIMPLE_HCL) + result = load(f, serialization_options=SerializationOptions(with_comments=False)) + self.assertEqual(result["x"], 5) + + +class TestDumps(TestCase): + + def test_simple_attribute(self): + result = dumps(SIMPLE_DICT) + self.assertIsInstance(result, str) + self.assertIn("x", result) + self.assertIn("5", result) + + def test_dumps_contains_key_and_value(self): + result = dumps(SIMPLE_DICT) + self.assertIn("x", result) + self.assertIn("5", result) + + def test_with_deserializer_options(self): + result = dumps(SIMPLE_DICT, deserializer_options=DeserializerOptions()) + self.assertIsInstance(result, str) + + def test_with_formatter_options(self): + result = dumps(SIMPLE_DICT, formatter_options=FormatterOptions()) + self.assertIsInstance(result, str) + + +class TestDump(TestCase): + + def test_writes_to_file(self): + f = StringIO() + dump(SIMPLE_DICT, f) + output = f.getvalue() + self.assertIn("x", output) + self.assertIn("5", output) + + +class TestParsesToTree(TestCase): + + def test_returns_lark_tree(self): + result = parses_to_tree(SIMPLE_HCL) + self.assertIsInstance(result, Tree) + + def test_tree_has_start_rule(self): + result = parses_to_tree(SIMPLE_HCL) + self.assertEqual(result.data, "start") + + +class TestParseToTree(TestCase): + + def test_from_file(self): + f = StringIO(SIMPLE_HCL) + result = parse_to_tree(f) + self.assertIsInstance(result, Tree) + + +class TestParses(TestCase): + + def test_returns_start_rule(self): + result = parses(SIMPLE_HCL) + self.assertIsInstance(result, StartRule) + + def test_discard_comments_false(self): + hcl = '# comment\nx = 5\n' + result = parses(hcl, discard_comments=False) + serialized = serialize(result) + self.assertIn("__comments__", serialized) + + def test_discard_comments_true(self): + hcl = '# comment\nx = 5\n' + result = parses(hcl, discard_comments=True) + serialized = serialize(result) + self.assertNotIn("__comments__", serialized) + + +class TestParse(TestCase): + + def test_from_file(self): + f = StringIO(SIMPLE_HCL) + result = parse(f) + self.assertIsInstance(result, StartRule) + + def test_discard_comments(self): + f = StringIO('# comment\nx = 5\n') + result = parse(f, discard_comments=True) + serialized = serialize(result) + self.assertNotIn("__comments__", serialized) + + +class TestTransform(TestCase): + + def test_transforms_lark_tree(self): + lark_tree = parses_to_tree(SIMPLE_HCL) + result = transform(lark_tree) + self.assertIsInstance(result, StartRule) + + def test_discard_comments(self): + lark_tree = parses_to_tree('# comment\nx = 5\n') + result = transform(lark_tree, discard_comments=True) + serialized = serialize(result) + self.assertNotIn("__comments__", serialized) + + +class TestSerialize(TestCase): + + def test_returns_dict(self): + tree = parses(SIMPLE_HCL) + result = serialize(tree) + self.assertIsInstance(result, dict) + self.assertEqual(result["x"], 5) + + def test_with_options(self): + tree = parses(SIMPLE_HCL) + result = serialize(tree, serialization_options=SerializationOptions(with_comments=False)) + self.assertIsInstance(result, dict) + + def test_none_options_uses_defaults(self): + tree = parses(SIMPLE_HCL) + result = serialize(tree, serialization_options=None) + self.assertEqual(result["x"], 5) + + +class TestFromDict(TestCase): + + def test_returns_start_rule(self): + result = from_dict(SIMPLE_DICT) + self.assertIsInstance(result, StartRule) + + def test_roundtrip(self): + tree = from_dict(SIMPLE_DICT) + result = serialize(tree) + self.assertEqual(result["x"], 5) + + def test_without_formatting(self): + result = from_dict(SIMPLE_DICT, format=False) + self.assertIsInstance(result, StartRule) + + def test_with_deserializer_options(self): + result = from_dict(SIMPLE_DICT, deserializer_options=DeserializerOptions()) + self.assertIsInstance(result, StartRule) + + def test_with_formatter_options(self): + result = from_dict(SIMPLE_DICT, formatter_options=FormatterOptions()) + self.assertIsInstance(result, StartRule) + + +class TestFromJson(TestCase): + + def test_returns_start_rule(self): + result = from_json('{"x": 5}') + self.assertIsInstance(result, StartRule) + + def test_roundtrip(self): + tree = from_json('{"x": 5}') + result = serialize(tree) + self.assertEqual(result["x"], 5) + + def test_without_formatting(self): + result = from_json('{"x": 5}', format=False) + self.assertIsInstance(result, StartRule) + + +class TestReconstruct(TestCase): + + def test_from_start_rule(self): + tree = parses(SIMPLE_HCL) + result = reconstruct(tree) + self.assertIsInstance(result, str) + self.assertIn("x", result) + + def test_from_lark_tree(self): + lark_tree = parses_to_tree(SIMPLE_HCL) + result = reconstruct(lark_tree) + self.assertIsInstance(result, str) + self.assertIn("x", result) + + def test_roundtrip(self): + tree = parses(SIMPLE_HCL) + hcl_text = reconstruct(tree) + reparsed = loads(hcl_text) + self.assertEqual(reparsed["x"], 5) diff --git a/test/unit/test_builder.py b/test/unit/test_builder.py index 2ce0cfed..5d411c64 100644 --- a/test/unit/test_builder.py +++ b/test/unit/test_builder.py @@ -1,110 +1,157 @@ -# pylint:disable=C0116 - -"""Test building an HCL file from scratch""" - -from pathlib import Path from unittest import TestCase -import hcl2 -import hcl2.builder - - -HELPERS_DIR = Path(__file__).absolute().parent.parent / "helpers" -HCL2_DIR = HELPERS_DIR / "terraform-config" -JSON_DIR = HELPERS_DIR / "terraform-config-json" -HCL2_FILES = [str(file.relative_to(HCL2_DIR)) for file in HCL2_DIR.iterdir()] - - -class TestBuilder(TestCase): - """Test building a variety of hcl files""" - - # print any differences fully to the console - maxDiff = None - - def test_build_blocks_tf(self): - nested_builder = hcl2.Builder() - nested_builder.block("nested_block_1", ["a"], foo="bar") - nested_builder.block("nested_block_1", ["a", "b"], bar="foo") - nested_builder.block("nested_block_1", foobar="barfoo") - nested_builder.block("nested_block_2", barfoo="foobar") - - builder = hcl2.Builder() - builder.block("block", a=1) - builder.block("block", ["label"], __nested_builder__=nested_builder, b=2) - - self.compare_filenames(builder, "blocks.tf") - - def test_build_escapes_tf(self): - builder = hcl2.Builder() - - builder.block("block", ["block_with_newlines"], a="line1\nline2") - - self.compare_filenames(builder, "escapes.tf") - - def test_locals_embdedded_condition_tf(self): - builder = hcl2.Builder() - - builder.block( - "locals", - terraform={ - "channels": "${(local.running_in_ci ? local.ci_channels : local.local_channels)}", - "authentication": [], - "foo": None, - }, +from hcl2.builder import Builder +from hcl2.const import IS_BLOCK + + +class TestBuilderAttributes(TestCase): + + def test_empty_builder(self): + b = Builder() + result = b.build() + self.assertIn(IS_BLOCK, result) + self.assertTrue(result[IS_BLOCK]) + + def test_with_attributes(self): + b = Builder({"key": "value", "count": 3}) + result = b.build() + self.assertEqual(result["key"], "value") + self.assertEqual(result["count"], 3) + + def test_is_block_marker_present(self): + b = Builder({"x": 1}) + result = b.build() + self.assertTrue(result[IS_BLOCK]) + + +class TestBuilderBlock(TestCase): + + def test_simple_block(self): + b = Builder() + b.block("resource") + result = b.build() + self.assertIn("resource", result) + self.assertEqual(len(result["resource"]), 1) + + def test_block_with_labels(self): + b = Builder() + b.block("resource", labels=["aws_instance", "example"]) + result = b.build() + block_entry = result["resource"][0] + self.assertIn("aws_instance", block_entry) + inner = block_entry["aws_instance"] + self.assertIn("example", inner) + + def test_block_with_attributes(self): + b = Builder() + b.block("resource", labels=["type"], ami="abc-123") + result = b.build() + block = result["resource"][0]["type"] + self.assertEqual(block["ami"], "abc-123") + + def test_multiple_blocks_same_type(self): + b = Builder() + b.block("resource", labels=["type_a"]) + b.block("resource", labels=["type_b"]) + result = b.build() + self.assertEqual(len(result["resource"]), 2) + + def test_multiple_block_types(self): + b = Builder() + b.block("resource") + b.block("data") + result = b.build() + self.assertIn("resource", result) + self.assertIn("data", result) + + def test_block_returns_builder(self): + b = Builder() + child = b.block("resource") + self.assertIsInstance(child, Builder) + + def test_block_child_attributes(self): + b = Builder() + child = b.block("resource", labels=["type"]) + child.attributes["nested_key"] = "nested_val" + # Rebuild to pick up the changes + result = b.build() + block = result["resource"][0]["type"] + self.assertEqual(block["nested_key"], "nested_val") + + def test_self_reference_raises(self): + b = Builder() + with self.assertRaises(ValueError): + b.block("resource", __nested_builder__=b) + + +class TestBuilderNestedBlocks(TestCase): + + def test_nested_builder(self): + b = Builder() + inner = Builder() + inner.block("provisioner", labels=["local-exec"], command="echo hello") + b.block("resource", labels=["type"], __nested_builder__=inner) + result = b.build() + block = result["resource"][0]["type"] + self.assertIn("provisioner", block) + + def test_nested_blocks_merged(self): + b = Builder() + inner = Builder() + inner.block("sub_block", x=1) + inner.block("sub_block", x=2) + b.block("resource", __nested_builder__=inner) + result = b.build() + block = result["resource"][0] + self.assertEqual(len(block["sub_block"]), 2) + + +class TestBuilderBlockMarker(TestCase): + + def test_block_marker_is_is_block(self): + """Verify IS_BLOCK marker is used (not __start_line__/__end_line__).""" + b = Builder({"x": 1}) + result = b.build() + self.assertIn(IS_BLOCK, result) + self.assertTrue(result[IS_BLOCK]) + self.assertNotIn("__start_line__", result) + self.assertNotIn("__end_line__", result) + + def test_nested_blocks_skip_is_block_key(self): + """_add_nested_blocks should skip IS_BLOCK when merging.""" + b = Builder() + inner = Builder() + inner.block("sub", val=1) + b.block("parent", __nested_builder__=inner) + result = b.build() + parent_block = result["parent"][0] + # sub blocks should be present, but IS_BLOCK from inner should not leak as a list + self.assertIn("sub", parent_block) + # IS_BLOCK should be a bool marker, not a list + self.assertTrue(parent_block[IS_BLOCK]) + + +class TestBuilderIntegration(TestCase): + + def test_full_document(self): + doc = Builder() + doc.block( + "resource", + labels=["aws_instance", "web"], + ami="ami-12345", + instance_type="t2.micro", ) - - self.compare_filenames(builder, "locals_embedded_condition.tf") - - def test_locals_embedded_function_tf(self): - builder = hcl2.Builder() - - function_test = ( - "${var.basename}-${var.forwarder_function_name}_" - '${md5("${var.vpc_id}${data.aws_region.current.name}")}' + doc.block( + "resource", + labels=["aws_s3_bucket", "data"], + bucket="my-bucket", ) - builder.block("locals", function_test=function_test) - - self.compare_filenames(builder, "locals_embedded_function.tf") + result = doc.build() + self.assertEqual(len(result["resource"]), 2) - def test_locals_embedded_interpolation_tf(self): - builder = hcl2.Builder() - - attributes = { - "simple_interpolation": "prefix:${var.foo}-suffix", - "embedded_interpolation": "(long substring without interpolation); " - '${module.special_constants.aws_accounts["aaa-${local.foo}-${local.bar}"]}/us-west-2/key_foo', - "deeply_nested_interpolation": 'prefix1-${"prefix2-${"prefix3-$${foo:bar}"}"}', - "escaped_interpolation": "prefix:$${aws:username}-suffix", - "simple_and_escaped": '${"bar"}$${baz:bat}', - "simple_and_escaped_reversed": '$${baz:bat}${"bar"}', - "nested_escaped": 'bar-${"$${baz:bat}"}', - } - - builder.block("locals", **attributes) - - self.compare_filenames(builder, "string_interpolations.tf") - - def test_provider_function_tf(self): - builder = hcl2.Builder() - - builder.block( - "locals", - name2='${provider::test2::test("a")}', - name3='${test("a")}', - ) + web = result["resource"][0]["aws_instance"]["web"] + self.assertEqual(web["ami"], "ami-12345") + self.assertEqual(web["instance_type"], "t2.micro") - self.compare_filenames(builder, "provider_function.tf") - - def compare_filenames(self, builder: hcl2.Builder, filename: str): - hcl_dict = builder.build() - hcl_ast = hcl2.reverse_transform(hcl_dict) - hcl_content_built = hcl2.writes(hcl_ast) - - hcl_path = (HCL2_DIR / filename).absolute() - with hcl_path.open("r") as hcl_file: - hcl_file_content = hcl_file.read() - self.assertMultiLineEqual( - hcl_content_built, - hcl_file_content, - f"file {filename} does not match its programmatically built version.", - ) + data = result["resource"][1]["aws_s3_bucket"]["data"] + self.assertEqual(data["bucket"], "my-bucket") diff --git a/test/unit/test_dict_transformer.py b/test/unit/test_dict_transformer.py deleted file mode 100644 index 122332eb..00000000 --- a/test/unit/test_dict_transformer.py +++ /dev/null @@ -1,32 +0,0 @@ -# pylint:disable=C0114,C0116,C0103,W0612 - -from unittest import TestCase - -from hcl2.transformer import DictTransformer - - -class TestDictTransformer(TestCase): - """Test behaviour of hcl2.transformer.DictTransformer class""" - - @staticmethod - def build_dict_transformer(with_meta: bool = False) -> DictTransformer: - return DictTransformer(with_meta) - - def test_to_string_dollar(self): - string_values = { - '"bool"': "bool", - '"number"': "number", - '"string"': "string", - "${value_1}": "${value_1}", - '"value_2': '${"value_2}', - 'value_3"': '${value_3"}', - '"value_4"': "value_4", - "value_5": "${value_5}", - } - - dict_transformer = self.build_dict_transformer() - - for value, expected in string_values.items(): - actual = dict_transformer.to_string_dollar(value) - - self.assertEqual(actual, expected) diff --git a/test/unit/test_hcl2_syntax.py b/test/unit/test_hcl2_syntax.py deleted file mode 100644 index 96113df3..00000000 --- a/test/unit/test_hcl2_syntax.py +++ /dev/null @@ -1,193 +0,0 @@ -# pylint:disable=C0114,C0116,C0103,W0612 - -import string # pylint:disable=W4901 # https://stackoverflow.com/a/16651393 -from unittest import TestCase - -from test.helpers.hcl2_helper import Hcl2Helper - -from lark import UnexpectedToken, UnexpectedCharacters - - -class TestHcl2Syntax(Hcl2Helper, TestCase): - """Test parsing individual elements of HCL2 syntax""" - - def test_argument(self): - syntax = self.build_argument("identifier", '"expression"') - result = self.load_to_dict(syntax) - self.assertDictEqual(result, {"identifier": "expression"}) - - def test_identifier_starts_with_digit(self): - for i in range(0, 10): - argument = self.build_argument(f"{i}id") - with self.assertRaises(UnexpectedToken) as e: - self.load_to_dict(argument) - assert ( - f"Unexpected token Token('DECIMAL', '{i}') at line 1, column 1" - in str(e) - ) - - def test_identifier_starts_with_special_chars(self): - chars = string.punctuation.replace("_", "") - for i in chars: - argument = self.build_argument(f"{i}id") - with self.assertRaises((UnexpectedToken, UnexpectedCharacters)) as e: - self.load_to_dict(argument) - - def test_identifier_contains_special_chars(self): - chars = string.punctuation.replace("_", "").replace("-", "") - for i in chars: - argument = self.build_argument(f"identifier{i}") - with self.assertRaises((UnexpectedToken, UnexpectedCharacters)) as e: - self.load_to_dict(argument) - - def test_identifier(self): - argument = self.build_argument("_-__identifier_-1234567890-_") - self.load_to_dict(argument) - - def test_block_no_labels(self): - block = """ - block { - } - """ - result = self.load_to_dict(block) - self.assertDictEqual(result, {"block": [{}]}) - - def test_block_single_label(self): - block = """ - block "label" { - } - """ - result = self.load_to_dict(block) - self.assertDictEqual(result, {"block": [{"label": {}}]}) - - def test_block_multiple_labels(self): - block = """ - block "label1" "label2" "label3" { - } - """ - result = self.load_to_dict(block) - self.assertDictEqual( - result, {"block": [{"label1": {"label2": {"label3": {}}}}]} - ) - - def test_unary_operation(self): - operations = [ - ("identifier = -10", {"identifier": -10}), - ("identifier = !true", {"identifier": "${!true}"}), - ] - for hcl, dict_ in operations: - result = self.load_to_dict(hcl) - self.assertDictEqual(result, dict_) - - def test_tuple(self): - tuple_ = """tuple = [ - identifier, - "string", 100, - true == false, - 5 + 5, function(), - ]""" - result = self.load_to_dict(tuple_) - self.assertDictEqual( - result, - { - "tuple": [ - "${identifier}", - "string", - 100, - "${true == false}", - "${5 + 5}", - "${function()}", - ] - }, - ) - - def test_object(self): - object_ = """object = { - key1: identifier, key2: "string", key3: 100, - key4: true == false // comment - key5: 5 + 5, key6: function(), - key7: value == null ? 1 : 0 - }""" - result = self.load_to_dict(object_) - self.assertDictEqual( - result, - { - "object": { - "key1": "${identifier}", - "key2": "string", - "key3": 100, - "key4": "${true == false}", - "key5": "${5 + 5}", - "key6": "${function()}", - "key7": "${value == null ? 1 : 0}", - } - }, - ) - - def test_function_call_and_arguments(self): - calls = { - "r = function()": {"r": "${function()}"}, - "r = function(arg1, arg2)": {"r": "${function(arg1, arg2)}"}, - """r = function( - arg1, arg2, - arg3, - ) - """: { - "r": "${function(arg1, arg2, arg3)}" - }, - } - - for call, expected in calls.items(): - result = self.load_to_dict(call) - self.assertDictEqual(result, expected) - - def test_index(self): - indexes = { - "r = identifier[10]": {"r": "${identifier[10]}"}, - "r = identifier.20": { - "r": "${identifier[2]}" - }, # TODO debug why `20` is parsed to `2` - """r = identifier["key"]""": {"r": '${identifier["key"]}'}, - """r = identifier.key""": {"r": "${identifier.key}"}, - } - for call, expected in indexes.items(): - result = self.load_to_dict(call) - self.assertDictEqual(result, expected) - - def test_e_notation(self): - literals = { - "var = 3e4": {"var": "${3e4}"}, - "var = 3.5e5": {"var": "${3.5e5}"}, - "var = -3e6": {"var": "${-3e6}"}, - "var = -2.3e4": {"var": "${-2.3e4}"}, - "var = -5e-2": {"var": "${-5e-2}"}, - "var = -6.1e-3": {"var": "${-6.1e-3}"}, - } - for actual, expected in literals.items(): - result = self.load_to_dict(actual) - self.assertDictEqual(result, expected) - - def test_null(self): - identifier = "var = null" - - expected = {"var": None} - - result = self.load_to_dict(identifier) - self.assertDictEqual(result, expected) - - def test_expr_term_parenthesis(self): - literals = { - "a = 1 * 2 + 3": {"a": "${1 * 2 + 3}"}, - "b = 1 * (2 + 3)": {"b": "${1 * (2 + 3)}"}, - "c = (1 * (2 + 3))": {"c": "${(1 * (2 + 3))}"}, - "conditional = value == null ? 1 : 0": { - "conditional": "${value == null ? 1 : 0}" - }, - "conditional = (value == null ? 1 : 0)": { - "conditional": "${(value == null ? 1 : 0)}" - }, - } - - for actual, expected in literals.items(): - result = self.load_to_dict(actual) - self.assertDictEqual(result, expected) diff --git a/test/unit/test_load.py b/test/unit/test_load.py deleted file mode 100644 index f9be8845..00000000 --- a/test/unit/test_load.py +++ /dev/null @@ -1,57 +0,0 @@ -""" Test parsing a variety of hcl files""" - -import json -from pathlib import Path -from unittest import TestCase - -from hcl2.parser import PARSER_FILE, parser -import hcl2 - - -HELPERS_DIR = Path(__file__).absolute().parent.parent / "helpers" -HCL2_DIR = HELPERS_DIR / "terraform-config" -JSON_DIR = HELPERS_DIR / "terraform-config-json" -HCL2_FILES = [str(file.relative_to(HCL2_DIR)) for file in HCL2_DIR.iterdir()] - - -class TestLoad(TestCase): - """Test parsing a variety of hcl files""" - - # print any differences fully to the console - maxDiff = None - - def test_load_terraform(self): - """Test parsing a set of hcl2 files and force recreating the parser file""" - - # create a parser to make sure that the parser file is created - parser() - - # delete the parser file to force it to be recreated - PARSER_FILE.unlink() - for hcl_path in HCL2_FILES: - yield self.check_terraform, hcl_path - - def test_load_terraform_from_cache(self): - """Test parsing a set of hcl2 files from a cached parser file""" - for hcl_path in HCL2_FILES: - yield self.check_terraform, hcl_path - - def check_terraform(self, hcl_path_str: str): - """Loads a single hcl2 file, parses it and compares with the expected json""" - hcl_path = (HCL2_DIR / hcl_path_str).absolute() - json_path = JSON_DIR / hcl_path.relative_to(HCL2_DIR).with_suffix(".json") - if not json_path.exists(): - assert ( - False - ), f"Expected json equivalent of the hcl file doesn't exist {json_path}" - - with hcl_path.open("r") as hcl_file, json_path.open("r") as json_file: - try: - hcl2_dict = hcl2.load(hcl_file) - except Exception as exc: - assert False, f"failed to tokenize terraform in `{hcl_path_str}`: {exc}" - - json_dict = json.load(json_file) - self.assertDictEqual( - hcl2_dict, json_dict, f"\n\nfailed comparing {hcl_path_str}" - ) diff --git a/test/unit/test_load_with_meta.py b/test/unit/test_load_with_meta.py deleted file mode 100644 index b081844e..00000000 --- a/test/unit/test_load_with_meta.py +++ /dev/null @@ -1,23 +0,0 @@ -"""Test parsing hcl files with meta parameters""" - -import json -from pathlib import Path -from unittest import TestCase - -import hcl2 - -TEST_WITH_META_DIR = Path(__file__).absolute().parent.parent / "helpers" / "with-meta" -TF_FILE_PATH = TEST_WITH_META_DIR / "data_sources.tf" -JSON_FILE_PATH = TEST_WITH_META_DIR / "data_sources.json" - - -class TestLoadWithMeta(TestCase): - """Test parsing hcl files with meta parameters""" - - def test_load_terraform_meta(self): - """Test load() with with_meta flag set to true.""" - with TF_FILE_PATH.open("r") as tf_file, JSON_FILE_PATH.open("r") as json_file: - self.assertDictEqual( - json.load(json_file), - hcl2.load(tf_file, with_meta=True), - ) diff --git a/test/unit/test_reconstruct_ast.py b/test/unit/test_reconstruct_ast.py deleted file mode 100644 index b9545def..00000000 --- a/test/unit/test_reconstruct_ast.py +++ /dev/null @@ -1,112 +0,0 @@ -""" Test reconstructing hcl files""" - -import json -from pathlib import Path -from unittest import TestCase - -import hcl2 - - -HELPERS_DIR = Path(__file__).absolute().parent.parent / "helpers" -HCL2_DIR = HELPERS_DIR / "terraform-config" -HCL2_FILES = [str(file.relative_to(HCL2_DIR)) for file in HCL2_DIR.iterdir()] -JSON_DIR = HELPERS_DIR / "terraform-config-json" - - -class TestReconstruct(TestCase): - """Test reconstructing a variety of hcl files""" - - # print any differences fully to the console - maxDiff = None - - def test_write_terraform(self): - """Test reconstructing a set of hcl2 files, to make sure they parse to the same structure""" - for hcl_path in HCL2_FILES: - yield self.check_terraform, hcl_path - - def test_write_terraform_exact(self): - """ - Test reconstructing a set of hcl2 files, to make sure they - reconstruct exactly the same, including whitespace. - """ - - # the reconstruction process is not precise, so some files do not - # reconstruct their whitespace exactly the same, but they are - # syntactically equivalent. This list is a target for further - # improvements to the whitespace handling of the reconstruction - # algorithm. - inexact_files = [ - # the reconstructor loses commas on the last element in an array, - # even if they're in the input file - "iam.tf", - "variables.tf", - # the reconstructor doesn't preserve indentation within comments - # perfectly - "multiline_expressions.tf", - # the reconstructor doesn't preserve the line that a ternary is - # broken on. - "route_table.tf", - ] - - for hcl_path in HCL2_FILES: - if hcl_path not in inexact_files: - yield self.check_whitespace, hcl_path - - def check_terraform(self, hcl_path_str: str): - """ - Loads a single hcl2 file, parses it, reconstructs it, - parses the reconstructed file, and compares with the expected json - """ - hcl_path = (HCL2_DIR / hcl_path_str).absolute() - json_path = JSON_DIR / hcl_path.relative_to(HCL2_DIR).with_suffix(".json") - with hcl_path.open("r") as hcl_file, json_path.open("r") as json_file: - hcl_file_content = hcl_file.read() - try: - hcl_ast = hcl2.parses(hcl_file_content) - except Exception as exc: - assert False, f"failed to tokenize terraform in `{hcl_path_str}`: {exc}" - - try: - hcl_reconstructed = hcl2.writes(hcl_ast) - except Exception as exc: - assert ( - False - ), f"failed to reconstruct terraform in `{hcl_path_str}`: {exc}" - - try: - hcl2_dict = hcl2.loads(hcl_reconstructed) - except Exception as exc: - assert ( - False - ), f"failed to tokenize terraform in file reconstructed from `{hcl_path_str}`: {exc}" - - json_dict = json.load(json_file) - self.assertDictEqual( - hcl2_dict, - json_dict, - f"failed comparing {hcl_path_str} with reconstructed version", - ) - - def check_whitespace(self, hcl_path_str: str): - """Tests that the reconstructed file matches the original file exactly.""" - hcl_path = (HCL2_DIR / hcl_path_str).absolute() - with hcl_path.open("r") as hcl_file: - hcl_file_content = hcl_file.read() - try: - hcl_ast = hcl2.parses(hcl_file_content) - except Exception as exc: - assert False, f"failed to tokenize terraform in `{hcl_path_str}`: {exc}" - - try: - hcl_reconstructed = hcl2.writes(hcl_ast) - except Exception as exc: - assert ( - False - ), f"failed to reconstruct terraform in `{hcl_path_str}`: {exc}" - - self.assertMultiLineEqual( - hcl_reconstructed, - hcl_file_content, - f"file {hcl_path_str} does not match its reconstructed version \ - exactly. this is usually whitespace related.", - ) diff --git a/test/unit/test_reconstruct_dict.py b/test/unit/test_reconstruct_dict.py deleted file mode 100644 index a65e8429..00000000 --- a/test/unit/test_reconstruct_dict.py +++ /dev/null @@ -1,88 +0,0 @@ -""" Test reconstructing hcl files""" - -import json -import traceback -from pathlib import Path -from unittest import TestCase - -import hcl2 - - -HELPERS_DIR = Path(__file__).absolute().parent.parent / "helpers" -HCL2_DIR = HELPERS_DIR / "terraform-config" -HCL2_FILES = [str(file.relative_to(HCL2_DIR)) for file in HCL2_DIR.iterdir()] -JSON_DIR = HELPERS_DIR / "terraform-config-json" - - -class TestReconstruct(TestCase): - """Test reconstructing a variety of hcl files""" - - # print any differences fully to the console - maxDiff = None - - def test_write_terraform(self): - """Test reconstructing a set of hcl2 files, to make sure they parse to the same structure""" - - # the reconstruction process is not precise, so some files do not - # reconstruct any embedded HCL expressions exactly the same. this - # list captures those, and should be manually inspected regularly to - # ensure that files remain syntactically equivalent - inexact_files = [ - # one level of interpolation is stripped from this file during - # reconstruction, since we don't have a way to distinguish it from - # a complex HCL expression. the output parses to the same value - # though - "multi_level_interpolation.tf", - ] - - for hcl_path in HCL2_FILES: - if hcl_path not in inexact_files: - yield self.check_terraform, hcl_path - - def check_terraform(self, hcl_path_str: str): - """ - Loads a single hcl2 file, parses it, reconstructs it, - parses the reconstructed file, and compares with the expected json - """ - hcl_path = (HCL2_DIR / hcl_path_str).absolute() - json_path = JSON_DIR / hcl_path.relative_to(HCL2_DIR).with_suffix(".json") - with hcl_path.open("r") as hcl_file, json_path.open("r") as json_file: - try: - hcl2_dict_correct = hcl2.load(hcl_file) - except Exception as exc: - raise RuntimeError( - f"failed to tokenize 'correct' terraform in " - f"`{hcl_path_str}`: {traceback.format_exc()}" - ) from exc - - json_dict = json.load(json_file) - - try: - hcl_ast = hcl2.reverse_transform(json_dict) - except Exception as exc: - raise RuntimeError( - f"failed to reverse transform HCL from " - f"`{json_path.name}`: {traceback.format_exc()}" - ) from exc - - try: - hcl_reconstructed = hcl2.writes(hcl_ast) - except Exception as exc: - raise RuntimeError( - f"failed to reconstruct terraform from AST from " - f"`{json_path.name}`: {traceback.format_exc()}" - ) from exc - - try: - hcl2_dict_reconstructed = hcl2.loads(hcl_reconstructed) - except Exception as exc: - raise RuntimeError( - f"failed to tokenize 'reconstructed' terraform from AST from " - f"`{json_path.name}`: {exc}, \n{hcl_reconstructed}" - ) from exc - - self.assertDictEqual( - hcl2_dict_reconstructed, - hcl2_dict_correct, - f"failed comparing {hcl_path_str} with reconstructed version from {json_path.name}", - ) diff --git a/test/unit/test_utils.py b/test/unit/test_utils.py new file mode 100644 index 00000000..f5f94e8c --- /dev/null +++ b/test/unit/test_utils.py @@ -0,0 +1,148 @@ +from unittest import TestCase + +from hcl2.utils import ( + SerializationOptions, + SerializationContext, + is_dollar_string, + to_dollar_string, + unwrap_dollar_string, + wrap_into_parentheses, +) + + +class TestSerializationOptions(TestCase): + def test_default_values(self): + opts = SerializationOptions() + self.assertTrue(opts.with_comments) + self.assertFalse(opts.with_meta) + self.assertFalse(opts.wrap_objects) + self.assertFalse(opts.wrap_tuples) + self.assertTrue(opts.explicit_blocks) + self.assertTrue(opts.preserve_heredocs) + self.assertFalse(opts.force_operation_parentheses) + + def test_custom_values(self): + opts = SerializationOptions( + with_comments=False, + with_meta=True, + force_operation_parentheses=True, + ) + self.assertFalse(opts.with_comments) + self.assertTrue(opts.with_meta) + self.assertTrue(opts.force_operation_parentheses) + + +class TestSerializationContext(TestCase): + def test_default_values(self): + ctx = SerializationContext() + self.assertFalse(ctx.inside_dollar_string) + self.assertFalse(ctx.inside_parentheses) + + def test_replace_returns_new_instance(self): + ctx = SerializationContext() + new_ctx = ctx.replace(inside_dollar_string=True) + self.assertIsNot(ctx, new_ctx) + self.assertFalse(ctx.inside_dollar_string) + self.assertTrue(new_ctx.inside_dollar_string) + + def test_modify_mutates_and_restores(self): + ctx = SerializationContext() + self.assertFalse(ctx.inside_dollar_string) + + with ctx.modify(inside_dollar_string=True): + self.assertTrue(ctx.inside_dollar_string) + + self.assertFalse(ctx.inside_dollar_string) + + def test_modify_restores_on_exception(self): + ctx = SerializationContext() + + with self.assertRaises(ValueError): + with ctx.modify(inside_dollar_string=True, inside_parentheses=True): + self.assertTrue(ctx.inside_dollar_string) + self.assertTrue(ctx.inside_parentheses) + raise ValueError("test") + + self.assertFalse(ctx.inside_dollar_string) + self.assertFalse(ctx.inside_parentheses) + + def test_modify_multiple_fields(self): + ctx = SerializationContext() + with ctx.modify(inside_dollar_string=True, inside_parentheses=True): + self.assertTrue(ctx.inside_dollar_string) + self.assertTrue(ctx.inside_parentheses) + self.assertFalse(ctx.inside_dollar_string) + self.assertFalse(ctx.inside_parentheses) + + def test_copy_yields_independent_copy(self): + ctx = SerializationContext() + with ctx.copy(inside_dollar_string=True) as copied: + self.assertTrue(copied.inside_dollar_string) + self.assertFalse(ctx.inside_dollar_string) + self.assertIsNot(ctx, copied) + + +class TestIsDollarString(TestCase): + def test_valid_dollar_string(self): + self.assertTrue(is_dollar_string("${x}")) + + def test_nested_dollar_string(self): + self.assertTrue(is_dollar_string("${a + b}")) + + def test_plain_string(self): + self.assertFalse(is_dollar_string("foo")) + + def test_incomplete_prefix(self): + self.assertFalse(is_dollar_string("${")) + + def test_non_string_input(self): + self.assertFalse(is_dollar_string(42)) + self.assertFalse(is_dollar_string(None)) + + def test_empty_dollar_string(self): + self.assertTrue(is_dollar_string("${}")) + + def test_dollar_without_brace(self): + self.assertFalse(is_dollar_string("$x}")) + + def test_missing_closing_brace(self): + self.assertFalse(is_dollar_string("${x")) + + +class TestToDollarString(TestCase): + def test_wraps_plain_string(self): + self.assertEqual(to_dollar_string("x"), "${x}") + + def test_idempotent_on_dollar_string(self): + self.assertEqual(to_dollar_string("${x}"), "${x}") + + def test_wraps_empty(self): + self.assertEqual(to_dollar_string(""), "${}") + + def test_wraps_expression(self): + self.assertEqual(to_dollar_string("a + b"), "${a + b}") + + +class TestUnwrapDollarString(TestCase): + def test_strips_wrapping(self): + self.assertEqual(unwrap_dollar_string("${x}"), "x") + + def test_noop_on_plain_string(self): + self.assertEqual(unwrap_dollar_string("foo"), "foo") + + def test_strips_complex_expression(self): + self.assertEqual(unwrap_dollar_string("${a + b}"), "a + b") + + +class TestWrapIntoParentheses(TestCase): + def test_plain_string(self): + self.assertEqual(wrap_into_parentheses("x"), "(x)") + + def test_dollar_string(self): + self.assertEqual(wrap_into_parentheses("${x}"), "${(x)}") + + def test_expression_string(self): + self.assertEqual(wrap_into_parentheses("a + b"), "(a + b)") + + def test_dollar_expression(self): + self.assertEqual(wrap_into_parentheses("${a + b}"), "${(a + b)}")