diff --git a/config.yml b/config.yml index 5e29d6fa18..d696a07f30 100644 --- a/config.yml +++ b/config.yml @@ -811,8 +811,6 @@ nodes: - GlobalVariableReadNode - BackReferenceReadNode - NumberedReferenceReadNode - - on error: SymbolNode # alias $a b - - on error: MissingNode # alias $a 42 comment: | Represents the old name of the global variable that can be used before aliasing. @@ -853,8 +851,6 @@ nodes: kind: - SymbolNode - InterpolatedSymbolNode - - on error: GlobalVariableReadNode # alias a $b - - on error: MissingNode # alias a 42 comment: | Represents the old name of the method that will be aliased. @@ -1867,7 +1863,6 @@ nodes: kind: - ConstantReadNode - ConstantPathNode - - on error: CallNode # class 0.X end - name: inheritance_operator_loc type: location? comment: | @@ -2422,6 +2417,13 @@ nodes: ^^^^^^ bar end + - name: ErrorRecoveryNode + fields: + - name: child + type: node? + kind: Node + comment: | + Represents a node that is either missing or unexpected and results in a syntax error. - name: FalseNode comment: | Represents the use of the literal `false` keyword. @@ -2464,9 +2466,7 @@ nodes: ^^^^^^ - name: right type: node - kind: - - SplatNode - - on error: MissingNode + kind: SplatNode comment: | Represents the second wildcard node in the pattern. @@ -2549,9 +2549,6 @@ nodes: - CallTargetNode - IndexTargetNode - MultiTargetNode - - on error: BackReferenceReadNode # for $& in a end - - on error: NumberedReferenceReadNode # for $1 in a end - - on error: MissingNode # for in 1..10; end comment: | The index expression for `for` loops. @@ -3305,10 +3302,6 @@ nodes: - EmbeddedStatementsNode - EmbeddedVariableNode - InterpolatedStringNode # `"a" "#{b}"` - - on error: XStringNode # `<<`FOO` "bar" - - on error: InterpolatedXStringNode - - on error: SymbolNode - - on error: InterpolatedSymbolNode - name: closing_loc type: location? newline: parts @@ -3686,9 +3679,6 @@ nodes: /(?bar)/ =~ baz ^^^^^^^^^^^^^^^^^^^^ - - name: MissingNode - comment: | - Represents a node that is missing from the source and results in a syntax error. - name: ModuleNode fields: - name: locals @@ -3700,7 +3690,6 @@ nodes: kind: - ConstantReadNode - ConstantPathNode - - on error: MissingNode # module Parent module end - name: body type: node? kind: @@ -3730,8 +3719,6 @@ nodes: - IndexTargetNode - MultiTargetNode - RequiredParameterNode # def m((a,b)); end - - on error: BackReferenceReadNode # a, (b, $&) = z - - on error: NumberedReferenceReadNode # a, (b, $1) = z comment: | Represents the targets expressions before a splat node. @@ -3775,8 +3762,6 @@ nodes: - IndexTargetNode - MultiTargetNode - RequiredParameterNode # def m((*,b)); end - - on error: BackReferenceReadNode # a, (*, $&) = z - - on error: NumberedReferenceReadNode # a, (*, $1) = z comment: | Represents the targets expressions after a splat node. @@ -3820,8 +3805,6 @@ nodes: - CallTargetNode - IndexTargetNode - MultiTargetNode - - on error: BackReferenceReadNode # $&, = z - - on error: NumberedReferenceReadNode # $1, = z comment: | Represents the targets expressions before a splat node. @@ -3864,8 +3847,6 @@ nodes: - CallTargetNode - IndexTargetNode - MultiTargetNode - - on error: BackReferenceReadNode # *, $& = z - - on error: NumberedReferenceReadNode # *, $1 = z comment: | Represents the targets expressions after a splat node. @@ -4051,11 +4032,6 @@ nodes: kind: - RequiredParameterNode - MultiTargetNode - # On parsing error of `f(**kwargs, ...)` or `f(**nil, ...)`, the keyword_rest value is moved here: - - on error: KeywordRestParameterNode - - on error: NoKeywordsParameterNode - # On parsing error of `f(..., ...)`, the first forwarding parameter is moved here: - - on error: ForwardingParameterNode - name: keywords type: node[] kind: @@ -4140,7 +4116,6 @@ nodes: - BackReferenceReadNode # foo in ^$& - NumberedReferenceReadNode # foo in ^$1 - ItLocalVariableReadNode # proc { 1 in ^it } - - on error: MissingNode # foo in ^Bar comment: | The variable used in the pinned expression @@ -4223,7 +4198,7 @@ nodes: 1...foo ^^^ - If neither right-hand or left-hand side was included, this will be a MissingNode. + If neither right-hand or left-hand side was included, this will be an ErrorRecoveryNode. - name: operator_loc type: location comment: | @@ -4338,9 +4313,6 @@ nodes: - ConstantPathTargetNode - CallTargetNode - IndexTargetNode - - on error: BackReferenceReadNode # => begin; rescue => $&; end - - on error: NumberedReferenceReadNode # => begin; rescue => $1; end - - on error: MissingNode # begin; rescue =>; end - name: then_keyword_loc type: location? - name: statements diff --git a/lib/prism/node_ext.rb b/lib/prism/node_ext.rb index 469e54ca0c..b7f4bc0def 100644 --- a/lib/prism/node_ext.rb +++ b/lib/prism/node_ext.rb @@ -208,7 +208,7 @@ def child if name ConstantReadNode.new(source, -1, name_loc, 0, name) else - MissingNode.new(source, -1, location, 0) + ErrorRecoveryNode.new(source, -1, location, 0, nil) end end end @@ -249,7 +249,7 @@ def child if name ConstantReadNode.new(source, -1, name_loc, 0, name) else - MissingNode.new(source, -1, location, 0) + ErrorRecoveryNode.new(source, -1, location, 0, nil) end end end diff --git a/rakelib/lint.rake b/rakelib/lint.rake index e5ecdb28bc..9df839842a 100644 --- a/rakelib/lint.rake +++ b/rakelib/lint.rake @@ -29,7 +29,7 @@ task :lint do exit(1) end - if (uncommented = nodes.select { |node| !%w[MissingNode ProgramNode].include?(node.fetch("name")) && !node.fetch("comment").match?(/^\s{4}/) }).any? + if (uncommented = nodes.select { |node| !%w[ErrorRecoveryNode ProgramNode].include?(node.fetch("name")) && !node.fetch("comment").match?(/^\s{4}/) }).any? names = uncommented.map { |node| node.fetch("name") } warn("Expected all nodes to have an example, missing comments for #{names.join(", ")}") exit(1) diff --git a/rust/ruby-prism/build.rs b/rust/ruby-prism/build.rs index 05cbee87d0..5e153c6d62 100644 --- a/rust/ruby-prism/build.rs +++ b/rust/ruby-prism/build.rs @@ -47,27 +47,12 @@ enum NodeFieldType { Double, } -#[derive(Debug, Deserialize)] -#[allow(dead_code)] -struct OnErrorType { - #[serde(rename = "on error")] - kind: String, -} - -#[derive(Debug, Deserialize)] -#[serde(untagged)] -#[allow(dead_code)] -enum UnionKind { - OnSuccess(String), - OnError(OnErrorType), -} - #[derive(Debug, Deserialize)] #[serde(untagged)] #[allow(dead_code)] enum NodeFieldKind { Concrete(String), - Union(Vec), + Union(Vec), } #[derive(Debug, Deserialize)] diff --git a/sig/prism/node_ext.rbs b/sig/prism/node_ext.rbs index a187c1d246..04d539e22b 100644 --- a/sig/prism/node_ext.rbs +++ b/sig/prism/node_ext.rbs @@ -60,18 +60,18 @@ module Prism class DynamicPartsInConstantPathError < StandardError end - class MissingNodesInConstantPathError < StandardError + class ErrorRecoveryNodesInConstantPathError < StandardError end def full_name_parts: () -> Array[Symbol] def full_name: () -> String - def child: () -> (ConstantReadNode | MissingNode) + def child: () -> (ConstantReadNode | ErrorRecoveryNode) end class ConstantPathTargetNode < Node def full_name_parts: () -> Array[Symbol] def full_name: () -> String - def child: () -> (ConstantReadNode | MissingNode) + def child: () -> (ConstantReadNode | ErrorRecoveryNode) end class ConstantTargetNode < Node diff --git a/src/prism.c b/src/prism.c index 1a8cdf7568..51a0048684 100644 --- a/src/prism.c +++ b/src/prism.c @@ -1958,19 +1958,54 @@ pm_node_alloc(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, size_t size) { #define PM_NODE_INIT_NODE_TOKEN(parser_, type_, flags_, node_, token_) PM_NODE_INIT(parser_, type_, flags_, PM_NODE_START(node_), PM_TOKEN_END(token_)) /** - * Allocate a new MissingNode node. + * Allocate a new ErrorRecoveryNode node to represent a missing node. */ -static pm_missing_node_t * +static pm_error_recovery_node_t * pm_missing_node_create(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) { - pm_missing_node_t *node = PM_NODE_ALLOC(parser, pm_missing_node_t); + pm_error_recovery_node_t *node = PM_NODE_ALLOC(parser, pm_error_recovery_node_t); - *node = (pm_missing_node_t) { - .base = PM_NODE_INIT(parser, PM_MISSING_NODE, 0, start, end) + *node = (pm_error_recovery_node_t) { + .base = PM_NODE_INIT(parser, PM_ERROR_RECOVERY_NODE, 0, start, end), + .child = NULL }; return node; } +/** + * Allocate a new ErrorRecoveryNode node that wraps an unexpected child node. + */ +static pm_error_recovery_node_t * +pm_unexpected_node_create(pm_parser_t *parser, pm_node_t *child) { + pm_error_recovery_node_t *node = PM_NODE_ALLOC(parser, pm_error_recovery_node_t); + + *node = (pm_error_recovery_node_t) { + .base = PM_NODE_INIT(parser, PM_ERROR_RECOVERY_NODE, 0, child->location.start, child->location.end), + .child = child + }; + + return node; +} + +/** + * Validate a required node's type. If NULL, creates an ErrorRecoveryNode with + * no child. If present but not one of the expected types, wraps it in an + * ErrorRecoveryNode. + */ +#define PM_VALIDATE_NODE_TYPE(parser, node, ...) \ + do { \ + if ((node) == NULL) { \ + (node) = UP(pm_missing_node_create((parser), (parser)->previous.start, (parser)->previous.end)); \ + } else if (!PM_NODE_TYPE_P((node), PM_ERROR_RECOVERY_NODE)) { \ + pm_node_type_t _expected[] = {__VA_ARGS__}; \ + bool _found = false; \ + for (size_t _i = 0; _i < sizeof(_expected) / sizeof(pm_node_type_t); _i++) { \ + if (PM_NODE_TYPE_P((node), _expected[_i])) { _found = true; break; } \ + } \ + if (!_found) (node) = UP(pm_unexpected_node_create((parser), (node))); \ + } \ + } while (0) + /** * Allocate and initialize a new AliasGlobalVariableNode node. */ @@ -3779,21 +3814,17 @@ pm_find_pattern_node_create(pm_parser_t *parser, pm_node_list_t *nodes) { pm_splat_node_t *left_splat_node = (pm_splat_node_t *) left; pm_node_t *right; + pm_splat_node_t *right_splat_node; if (nodes->size == 1) { - right = UP(pm_missing_node_create(parser, left->location.end, left->location.end)); + right = left; + right_splat_node = left_splat_node; } else { right = nodes->nodes[nodes->size - 1]; assert(PM_NODE_TYPE_P(right, PM_SPLAT_NODE)); + right_splat_node = (pm_splat_node_t *) right; } -#if PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS - // FindPatternNode#right is typed as SplatNode in this case, so replace the potential MissingNode with a SplatNode. - // The resulting AST will anyway be ignored, but this file still needs to compile. - pm_splat_node_t *right_splat_node = PM_NODE_TYPE_P(right, PM_SPLAT_NODE) ? (pm_splat_node_t *) right : left_splat_node; -#else - pm_node_t *right_splat_node = right; -#endif *node = (pm_find_pattern_node_t) { .base = PM_NODE_INIT_NODES(parser, PM_FIND_PATTERN_NODE, 0, left, right), .constant = NULL, @@ -4716,7 +4747,7 @@ pm_interpolated_node_append(pm_node_t *node, pm_node_list_t *parts, pm_node_t *p pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL); break; default: - assert(false && "unexpected node type"); + pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL); break; } @@ -4853,16 +4884,8 @@ pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_ // should clear the mutability flags. CLEAR_FLAGS(node); break; - case PM_X_STRING_NODE: - case PM_INTERPOLATED_X_STRING_NODE: - case PM_SYMBOL_NODE: - case PM_INTERPOLATED_SYMBOL_NODE: - // These will only happen in error cases. But we want to handle it - // here so that we don't fail the assertion. - CLEAR_FLAGS(node); - break; default: - assert(false && "unexpected node type"); + CLEAR_FLAGS(node); break; } @@ -12677,7 +12700,7 @@ parse_unwriteable_target(pm_parser_t *parser, pm_node_t *target) { static pm_node_t * parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_parent) { switch (PM_NODE_TYPE(target)) { - case PM_MISSING_NODE: + case PM_ERROR_RECOVERY_NODE: return target; case PM_SOURCE_ENCODING_NODE: case PM_FALSE_NODE: @@ -12867,7 +12890,7 @@ parse_shareable_constant_write(pm_parser_t *parser, pm_node_t *write) { static pm_node_t * parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_node_t *value) { switch (PM_NODE_TYPE(target)) { - case PM_MISSING_NODE: + case PM_ERROR_RECOVERY_NODE: pm_node_destroy(parser, value); return target; case PM_CLASS_VARIABLE_READ_NODE: { @@ -13098,7 +13121,13 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b bool has_rest = PM_NODE_TYPE_P(first_target, PM_SPLAT_NODE); pm_multi_target_node_t *result = pm_multi_target_node_create(parser); - pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target, true, false)); + + pm_node_t *first = parse_target(parser, first_target, true, false); + PM_VALIDATE_NODE_TYPE(parser, first, + PM_SPLAT_NODE, PM_IMPLICIT_REST_NODE, PM_LOCAL_VARIABLE_TARGET_NODE, PM_INSTANCE_VARIABLE_TARGET_NODE, + PM_CLASS_VARIABLE_TARGET_NODE, PM_GLOBAL_VARIABLE_TARGET_NODE, PM_CONSTANT_TARGET_NODE, + PM_CONSTANT_PATH_TARGET_NODE, PM_CALL_TARGET_NODE, PM_INDEX_TARGET_NODE, PM_MULTI_TARGET_NODE, PM_REQUIRED_PARAMETER_NODE); + pm_multi_target_node_targets_append(parser, result, first); while (accept1(parser, PM_TOKEN_COMMA)) { if (accept1(parser, PM_TOKEN_USTAR)) { @@ -13125,12 +13154,20 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1)); target = parse_target(parser, target, true, false); + PM_VALIDATE_NODE_TYPE(parser, target, + PM_SPLAT_NODE, PM_IMPLICIT_REST_NODE, PM_LOCAL_VARIABLE_TARGET_NODE, PM_INSTANCE_VARIABLE_TARGET_NODE, + PM_CLASS_VARIABLE_TARGET_NODE, PM_GLOBAL_VARIABLE_TARGET_NODE, PM_CONSTANT_TARGET_NODE, + PM_CONSTANT_PATH_TARGET_NODE, PM_CALL_TARGET_NODE, PM_INDEX_TARGET_NODE, PM_MULTI_TARGET_NODE, PM_REQUIRED_PARAMETER_NODE); pm_multi_target_node_targets_append(parser, result, target); context_pop(parser); } else if (token_begins_expression_p(parser->current.type)) { pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1)); target = parse_target(parser, target, true, false); + PM_VALIDATE_NODE_TYPE(parser, target, + PM_SPLAT_NODE, PM_IMPLICIT_REST_NODE, PM_LOCAL_VARIABLE_TARGET_NODE, PM_INSTANCE_VARIABLE_TARGET_NODE, + PM_CLASS_VARIABLE_TARGET_NODE, PM_GLOBAL_VARIABLE_TARGET_NODE, PM_CONSTANT_TARGET_NODE, + PM_CONSTANT_PATH_TARGET_NODE, PM_CALL_TARGET_NODE, PM_INDEX_TARGET_NODE, PM_MULTI_TARGET_NODE, PM_REQUIRED_PARAMETER_NODE); pm_multi_target_node_targets_append(parser, result, target); } else if (!match1(parser, PM_TOKEN_EOF)) { // If we get here, then we have a trailing , in a multi target node. @@ -13220,7 +13257,7 @@ parse_statements(pm_parser_t *parser, pm_context_t context, uint16_t depth) { // we were unable to parse an expression, then we will skip past this // token and continue parsing the statements list. Otherwise we'll add // an error and continue parsing the statements list. - if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) { + if (PM_NODE_TYPE_P(node, PM_ERROR_RECOVERY_NODE)) { parser_lex(parser); // If we are at the end of the file, then we need to stop parsing @@ -13661,7 +13698,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for parsed_first_argument = true; // If parsing the argument failed, we need to stop parsing arguments. - if (PM_NODE_TYPE_P(argument, PM_MISSING_NODE) || parser->recovering) break; + if (PM_NODE_TYPE_P(argument, PM_ERROR_RECOVERY_NODE) || parser->recovering) break; // If the terminator of these arguments is not EOF, then we have a // specific token we're looking for. In that case we can accept a @@ -13903,7 +13940,9 @@ parse_parameters( pm_parameters_node_block_set(params, param); } else { pm_parser_err_node(parser, UP(param), PM_ERR_PARAMETER_BLOCK_MULTI); - pm_parameters_node_posts_append(params, UP(param)); + pm_node_t *post_param = UP(param); + PM_VALIDATE_NODE_TYPE(parser, post_param, PM_REQUIRED_PARAMETER_NODE, PM_MULTI_TARGET_NODE); + pm_parameters_node_posts_append(params, post_param); } break; @@ -13923,6 +13962,7 @@ parse_parameters( // If we already have a keyword rest parameter, then we replace it with the // forwarding parameter and move the keyword rest parameter to the posts list. pm_node_t *keyword_rest = params->keyword_rest; + PM_VALIDATE_NODE_TYPE(parser, keyword_rest, PM_REQUIRED_PARAMETER_NODE, PM_MULTI_TARGET_NODE); pm_parameters_node_posts_append(params, keyword_rest); if (succeeded) pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD); params->keyword_rest = NULL; @@ -14137,6 +14177,7 @@ parse_parameters( pm_parameters_node_rest_set(params, param); } else { pm_parser_err_node(parser, param, PM_ERR_PARAMETER_SPLAT_MULTI); + PM_VALIDATE_NODE_TYPE(parser, param, PM_REQUIRED_PARAMETER_NODE, PM_MULTI_TARGET_NODE); pm_parameters_node_posts_append(params, param); } @@ -14180,6 +14221,7 @@ parse_parameters( pm_parameters_node_keyword_rest_set(params, param); } else { pm_parser_err_node(parser, param, PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI); + PM_VALIDATE_NODE_TYPE(parser, param, PM_REQUIRED_PARAMETER_NODE, PM_MULTI_TARGET_NODE); pm_parameters_node_posts_append(params, param); } @@ -14195,8 +14237,9 @@ parse_parameters( if (params->rest == NULL) { pm_parameters_node_rest_set(params, param); } else { - pm_parser_err_node(parser, UP(param), PM_ERR_PARAMETER_SPLAT_MULTI); - pm_parameters_node_posts_append(params, UP(param)); + pm_parser_err_node(parser, param, PM_ERR_PARAMETER_SPLAT_MULTI); + PM_VALIDATE_NODE_TYPE(parser, param, PM_REQUIRED_PARAMETER_NODE, PM_MULTI_TARGET_NODE); + pm_parameters_node_posts_append(params, param); } } else { pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA); @@ -14376,6 +14419,9 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_ pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1)); reference = parse_target(parser, reference, false, false); + PM_VALIDATE_NODE_TYPE(parser, reference, + PM_LOCAL_VARIABLE_TARGET_NODE, PM_INSTANCE_VARIABLE_TARGET_NODE, PM_CLASS_VARIABLE_TARGET_NODE, PM_GLOBAL_VARIABLE_TARGET_NODE, + PM_CONSTANT_TARGET_NODE, PM_CONSTANT_PATH_TARGET_NODE, PM_CALL_TARGET_NODE, PM_INDEX_TARGET_NODE); pm_rescue_node_reference_set(rescue, reference); break; } @@ -14406,6 +14452,9 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_ pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1)); reference = parse_target(parser, reference, false, false); + PM_VALIDATE_NODE_TYPE(parser, reference, + PM_LOCAL_VARIABLE_TARGET_NODE, PM_INSTANCE_VARIABLE_TARGET_NODE, PM_CLASS_VARIABLE_TARGET_NODE, PM_GLOBAL_VARIABLE_TARGET_NODE, + PM_CONSTANT_TARGET_NODE, PM_CONSTANT_PATH_TARGET_NODE, PM_CALL_TARGET_NODE, PM_INDEX_TARGET_NODE); pm_rescue_node_reference_set(rescue, reference); break; } @@ -16137,14 +16186,16 @@ parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint1 // Otherwise we need to check the type of the node we just parsed. // If it cannot be concatenated with the previous node, then we'll // need to add a syntax error. - if (!PM_NODE_TYPE_P(node, PM_STRING_NODE) && !PM_NODE_TYPE_P(node, PM_INTERPOLATED_STRING_NODE)) { + PM_VALIDATE_NODE_TYPE(parser, node, PM_STRING_NODE, PM_INTERPOLATED_STRING_NODE); + if (PM_NODE_TYPE_P(node, PM_ERROR_RECOVERY_NODE)) { pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION); } // If we haven't already created our container for concatenation, // we'll do that now. if (!concating) { - if (!PM_NODE_TYPE_P(current, PM_STRING_NODE) && !PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) { + PM_VALIDATE_NODE_TYPE(parser, current, PM_STRING_NODE, PM_INTERPOLATED_STRING_NODE); + if (PM_NODE_TYPE_P(current, PM_ERROR_RECOVERY_NODE)) { pm_parser_err_node(parser, current, PM_ERR_STRING_CONCATENATION); } @@ -16729,7 +16780,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm // Call nodes (arithmetic operations) are not allowed in patterns if (PM_NODE_TYPE(node) == PM_CALL_NODE) { pm_parser_err_node(parser, node, diag_id); - pm_missing_node_t *missing_node = pm_missing_node_create(parser, node->location.start, node->location.end); + pm_error_recovery_node_t *missing_node = pm_missing_node_create(parser, node->location.start, node->location.end); pm_node_unreference(parser, node); pm_node_destroy(parser, node); @@ -16822,6 +16873,9 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm // not understood. We'll create a missing node and return that. pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN); pm_node_t *variable = UP(pm_missing_node_create(parser, operator.start, operator.end)); + PM_VALIDATE_NODE_TYPE(parser, variable, + PM_LOCAL_VARIABLE_READ_NODE, PM_INSTANCE_VARIABLE_READ_NODE, PM_CLASS_VARIABLE_READ_NODE, PM_GLOBAL_VARIABLE_READ_NODE, + PM_BACK_REFERENCE_READ_NODE, PM_NUMBERED_REFERENCE_READ_NODE, PM_IT_LOCAL_VARIABLE_READ_NODE); return UP(pm_pinned_variable_node_create(parser, &operator, variable)); } } @@ -17521,7 +17575,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b } pm_array_node_elements_append(array, element); - if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break; + if (PM_NODE_TYPE_P(element, PM_ERROR_RECOVERY_NODE)) break; } accept1(parser, PM_TOKEN_NEWLINE); @@ -17618,6 +17672,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b multi_target = (pm_multi_target_node_t *) statement; } else { multi_target = pm_multi_target_node_create(parser); + PM_VALIDATE_NODE_TYPE(parser, statement, + PM_SPLAT_NODE, PM_IMPLICIT_REST_NODE, PM_LOCAL_VARIABLE_TARGET_NODE, PM_INSTANCE_VARIABLE_TARGET_NODE, + PM_CLASS_VARIABLE_TARGET_NODE, PM_GLOBAL_VARIABLE_TARGET_NODE, PM_CONSTANT_TARGET_NODE, + PM_CONSTANT_PATH_TARGET_NODE, PM_CALL_TARGET_NODE, PM_INDEX_TARGET_NODE, PM_MULTI_TARGET_NODE, PM_REQUIRED_PARAMETER_NODE); pm_multi_target_node_targets_append(parser, multi_target, statement); } @@ -17697,7 +17755,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // If we couldn't parse an expression at all, then we need to // bail out of the loop. - if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) break; + if (PM_NODE_TYPE_P(node, PM_ERROR_RECOVERY_NODE)) break; // If we successfully parsed a statement, then we are going to // need terminator to delimit them. @@ -17726,6 +17784,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b if (PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) { pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser); + PM_VALIDATE_NODE_TYPE(parser, statement, + PM_SPLAT_NODE, PM_IMPLICIT_REST_NODE, PM_LOCAL_VARIABLE_TARGET_NODE, PM_INSTANCE_VARIABLE_TARGET_NODE, + PM_CLASS_VARIABLE_TARGET_NODE, PM_GLOBAL_VARIABLE_TARGET_NODE, PM_CONSTANT_TARGET_NODE, + PM_CONSTANT_PATH_TARGET_NODE, PM_CALL_TARGET_NODE, PM_INDEX_TARGET_NODE, PM_MULTI_TARGET_NODE, PM_REQUIRED_PARAMETER_NODE); pm_multi_target_node_targets_append(parser, multi_target, statement); statement = UP(multi_target); @@ -18175,6 +18237,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT); } + PM_VALIDATE_NODE_TYPE(parser, old_name, PM_BACK_REFERENCE_READ_NODE, PM_NUMBERED_REFERENCE_READ_NODE, PM_GLOBAL_VARIABLE_READ_NODE); return UP(pm_alias_global_variable_node_create(parser, &keyword, new_name, old_name)); } case PM_SYMBOL_NODE: @@ -18182,9 +18245,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b if (!PM_NODE_TYPE_P(old_name, PM_SYMBOL_NODE) && !PM_NODE_TYPE_P(old_name, PM_INTERPOLATED_SYMBOL_NODE)) { pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT); } + + PM_VALIDATE_NODE_TYPE(parser, old_name, PM_SYMBOL_NODE, PM_INTERPOLATED_SYMBOL_NODE); } PRISM_FALLTHROUGH default: + PM_VALIDATE_NODE_TYPE(parser, old_name, PM_SYMBOL_NODE, PM_INTERPOLATED_SYMBOL_NODE); return UP(pm_alias_method_node_create(parser, &keyword, new_name, old_name)); } } @@ -18248,14 +18314,14 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_splat_node_t *splat_node = pm_splat_node_create(parser, &operator, expression); pm_when_node_conditions_append(when_node, UP(splat_node)); - if (PM_NODE_TYPE_P(expression, PM_MISSING_NODE)) break; + if (PM_NODE_TYPE_P(expression, PM_ERROR_RECOVERY_NODE)) break; } else { pm_node_t *condition = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_CASE_EXPRESSION_AFTER_WHEN, (uint16_t) (depth + 1)); pm_when_node_conditions_append(when_node, condition); // If we found a missing node, then this is a syntax // error and we should stop looping. - if (PM_NODE_TYPE_P(condition, PM_MISSING_NODE)) break; + if (PM_NODE_TYPE_P(condition, PM_ERROR_RECOVERY_NODE)) break; // If this is a string node, then we need to mark it // as frozen because when clause strings are frozen. @@ -18655,7 +18721,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_parser_scope_pop(parser); pm_do_loop_stack_pop(parser); - if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) { + PM_VALIDATE_NODE_TYPE(parser, constant_path, PM_CONSTANT_PATH_NODE, PM_CONSTANT_READ_NODE); + if (PM_NODE_TYPE_P(constant_path, PM_ERROR_RECOVERY_NODE)) { pm_parser_err_node(parser, constant_path, PM_ERR_CLASS_NAME); } @@ -19113,6 +19180,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b parser_warn_indentation_mismatch(parser, opening_newline_index, &for_keyword, false, false); expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_FOR_TERM, &for_keyword); + PM_VALIDATE_NODE_TYPE(parser, index, + PM_LOCAL_VARIABLE_TARGET_NODE, PM_INSTANCE_VARIABLE_TARGET_NODE, PM_CLASS_VARIABLE_TARGET_NODE, PM_GLOBAL_VARIABLE_TARGET_NODE, + PM_CONSTANT_TARGET_NODE, PM_CONSTANT_PATH_TARGET_NODE, PM_CALL_TARGET_NODE, PM_INDEX_TARGET_NODE, + PM_MULTI_TARGET_NODE); + return UP(pm_for_node_create(parser, index, collection, statements, &for_keyword, &in_keyword, &do_keyword, &parser->previous)); } case PM_TOKEN_KEYWORD_IF: @@ -19134,7 +19206,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_undef_node_t *undef = pm_undef_node_create(parser, &parser->previous); pm_node_t *name = parse_undef_argument(parser, (uint16_t) (depth + 1)); - if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) { + if (PM_NODE_TYPE_P(name, PM_ERROR_RECOVERY_NODE)) { pm_node_destroy(parser, name); } else { pm_undef_node_append(undef, name); @@ -19144,7 +19216,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b parser_lex(parser); name = parse_undef_argument(parser, (uint16_t) (depth + 1)); - if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) { + if (PM_NODE_TYPE_P(name, PM_ERROR_RECOVERY_NODE)) { pm_node_destroy(parser, name); break; } @@ -19218,7 +19290,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // If we can recover from a syntax error that occurred while parsing // the name of the module, then we'll handle that here. - if (PM_NODE_TYPE_P(constant_path, PM_MISSING_NODE)) { + if (PM_NODE_TYPE_P(constant_path, PM_ERROR_RECOVERY_NODE)) { pop_block_exits(parser, previous_block_exits); pm_node_list_free(¤t_block_exits); @@ -19268,6 +19340,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_parser_err_token(parser, &module_keyword, PM_ERR_MODULE_IN_METHOD); } + PM_VALIDATE_NODE_TYPE(parser, constant_path, PM_CONSTANT_PATH_NODE, PM_CONSTANT_READ_NODE); + pop_block_exits(parser, previous_block_exits); pm_node_list_free(¤t_block_exits); @@ -20291,7 +20365,7 @@ parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding pm_node_t *element = parse_starred_expression(parser, binding_power, false, PM_ERR_ARRAY_ELEMENT, (uint16_t) (depth + 1)); pm_array_node_elements_append(array, element); - if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break; + if (PM_NODE_TYPE_P(element, PM_ERROR_RECOVERY_NODE)) break; parse_assignment_value_local(parser, element); } @@ -21584,7 +21658,7 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth); switch (PM_NODE_TYPE(node)) { - case PM_MISSING_NODE: + case PM_ERROR_RECOVERY_NODE: // If we found a syntax error, then the type of node returned by // parse_expression_prefix is going to be a missing node. return node; diff --git a/templates/lib/prism/dsl.rb.erb b/templates/lib/prism/dsl.rb.erb index e16ebb7110..463c7bb71e 100644 --- a/templates/lib/prism/dsl.rb.erb +++ b/templates/lib/prism/dsl.rb.erb @@ -127,7 +127,7 @@ module Prism # The default node that gets attached to nodes if no node is specified for a # required node field. def default_node(source, location) - MissingNode.new(source, -1, location, 0) + ErrorRecoveryNode.new(source, -1, location, 0, nil) end end end diff --git a/templates/template.rb b/templates/template.rb index 6c3efd7e6c..d5e49db824 100755 --- a/templates/template.rb +++ b/templates/template.rb @@ -8,7 +8,6 @@ module Prism module Template SERIALIZE_ONLY_SEMANTICS_FIELDS = ENV.fetch("PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS", false) - REMOVE_ON_ERROR_TYPES = SERIALIZE_ONLY_SEMANTICS_FIELDS CHECK_FIELD_KIND = ENV.fetch("CHECK_FIELD_KIND", false) JAVA_BACKEND = ENV["PRISM_JAVA_BACKEND"] || "truffleruby" @@ -432,13 +431,10 @@ def initialize(config, flags) when "pattern expression" # the list of all possible types is too long with 37+ different classes "Node" - when Hash - kind = kind.fetch("on error") - REMOVE_ON_ERROR_TYPES ? nil : kind else kind end - end.compact + end if kinds.size == 1 kinds = kinds.first kinds = nil if kinds == "Node" diff --git a/test/prism/result/error_recovery_test.rb b/test/prism/result/error_recovery_test.rb new file mode 100644 index 0000000000..0ce63bbb6a --- /dev/null +++ b/test/prism/result/error_recovery_test.rb @@ -0,0 +1,237 @@ +# frozen_string_literal: true + +require_relative "../test_helper" + +module Prism + class ErrorRecoveryTest < TestCase + def test_alias_global_variable_node_old_name_symbol + result = Prism.parse("alias $a b") + refute result.success? + + node = result.value.statements.body.first + assert_kind_of ErrorRecoveryNode, node.old_name + assert_kind_of SymbolNode, node.old_name.child + end + + def test_alias_global_variable_node_old_name_missing + result = Prism.parse("alias $a 42") + refute result.success? + + node = result.value.statements.body.first + assert_kind_of ErrorRecoveryNode, node.old_name + assert_nil node.old_name.child + end + + def test_alias_method_node_old_name_global_variable + result = Prism.parse("alias a $b") + refute result.success? + + node = result.value.statements.body.first + assert_kind_of ErrorRecoveryNode, node.old_name + assert_kind_of GlobalVariableReadNode, node.old_name.child + end + + def test_alias_method_node_old_name_missing + result = Prism.parse("alias a 42") + refute result.success? + + node = result.value.statements.body.first + assert_kind_of ErrorRecoveryNode, node.old_name + assert_nil node.old_name.child + end + + def test_class_node_constant_path_call + result = Prism.parse("class 0.X; end") + refute result.success? + + node = result.value.statements.body.first + assert_kind_of ErrorRecoveryNode, node.constant_path + assert_kind_of CallNode, node.constant_path.child + end + + def test_for_node_index_back_reference + result = Prism.parse("for $& in a; end") + refute result.success? + + node = result.value.statements.body.first + assert_kind_of ErrorRecoveryNode, node.index + assert_kind_of BackReferenceReadNode, node.index.child + end + + def test_for_node_index_numbered_reference + result = Prism.parse("for $1 in a; end") + refute result.success? + + node = result.value.statements.body.first + assert_kind_of ErrorRecoveryNode, node.index + assert_kind_of NumberedReferenceReadNode, node.index.child + end + + def test_for_node_index_missing + result = Prism.parse("for in 1..10; end") + refute result.success? + + node = result.value.statements.body.first + assert_kind_of ErrorRecoveryNode, node.index + assert_nil node.index.child + end + + def test_interpolated_string_node_parts_xstring + result = Prism.parse("<<~`FOO` \"bar\"\nls\nFOO\n") + refute result.success? + + node = result.value.statements.body.first + assert node.parts.any? { |part| part.is_a?(ErrorRecoveryNode) && part.child.is_a?(XStringNode) } + end + + def test_interpolated_string_node_parts_interpolated_xstring + result = Prism.parse("<<~`FOO` \"bar\"\n\#{ls}\nFOO\n") + refute result.success? + + node = result.value.statements.body.first + assert node.parts.any? { |part| part.is_a?(ErrorRecoveryNode) && part.child.is_a?(InterpolatedXStringNode) } + end + + def test_module_node_constant_path_def + result = Prism.parse("module def foo; end") + refute result.success? + + node = result.value.statements.body.first + assert_kind_of ErrorRecoveryNode, node.constant_path + assert_kind_of DefNode, node.constant_path.child + end + + def test_module_node_constant_path_missing + result = Prism.parse("module Parent module end") + refute result.success? + + node = result.value.statements.body.first.body.body.first + assert_kind_of ErrorRecoveryNode, node.constant_path + assert_nil node.constant_path.child + end + + def test_multi_target_node_lefts_back_reference + result = Prism.parse("a, (b, $&) = z") + refute result.success? + + node = result.value.statements.body.first.lefts.last + assert node.lefts.any? { |left| left.is_a?(ErrorRecoveryNode) && left.child.is_a?(BackReferenceReadNode) } + end + + def test_multi_target_node_lefts_numbered_reference + result = Prism.parse("a, (b, $1) = z") + refute result.success? + + node = result.value.statements.body.first.lefts.last + assert node.lefts.any? { |left| left.is_a?(ErrorRecoveryNode) && left.child.is_a?(NumberedReferenceReadNode) } + end + + def test_multi_target_node_rights_back_reference + result = Prism.parse("a, (*, $&) = z") + refute result.success? + + node = result.value.statements.body.first.lefts.last + assert node.rights.any? { |right| right.is_a?(ErrorRecoveryNode) && right.child.is_a?(BackReferenceReadNode) } + end + + def test_multi_target_node_rights_numbered_reference + result = Prism.parse("a, (*, $1) = z") + refute result.success? + + node = result.value.statements.body.first.lefts.last + assert node.rights.any? { |right| right.is_a?(ErrorRecoveryNode) && right.child.is_a?(NumberedReferenceReadNode) } + end + + def test_multi_write_node_lefts_back_reference + result = Prism.parse("$&, = z") + refute result.success? + + node = result.value.statements.body.first + assert node.lefts.any? { |left| left.is_a?(ErrorRecoveryNode) && left.child.is_a?(BackReferenceReadNode) } + end + + def test_multi_write_node_lefts_numbered_reference + result = Prism.parse("$1, = z") + refute result.success? + + node = result.value.statements.body.first + assert node.lefts.any? { |left| left.is_a?(ErrorRecoveryNode) && left.child.is_a?(NumberedReferenceReadNode) } + end + + def test_multi_write_node_rights_back_reference + result = Prism.parse("*, $& = z") + refute result.success? + + node = result.value.statements.body.first + assert node.rights.any? { |right| right.is_a?(ErrorRecoveryNode) && right.child.is_a?(BackReferenceReadNode) } + end + + def test_multi_write_node_rights_numbered_reference + result = Prism.parse("*, $1 = z") + refute result.success? + + node = result.value.statements.body.first + assert node.rights.any? { |right| right.is_a?(ErrorRecoveryNode) && right.child.is_a?(NumberedReferenceReadNode) } + end + + def test_parameters_node_posts_keyword_rest + result = Prism.parse("def f(**kwargs, ...); end") + refute result.success? + + node = result.value.statements.body.first.parameters + assert node.posts.any? { |post| post.is_a?(ErrorRecoveryNode) && post.child.is_a?(KeywordRestParameterNode) } + end + + def test_parameters_node_posts_no_keywords + result = Prism.parse("def f(**nil, ...); end") + refute result.success? + + node = result.value.statements.body.first.parameters + assert node.posts.any? { |post| post.is_a?(ErrorRecoveryNode) && post.child.is_a?(NoKeywordsParameterNode) } + end + + def test_parameters_node_posts_forwarding + result = Prism.parse("def f(..., ...); end") + refute result.success? + + node = result.value.statements.body.first.parameters + assert node.posts.any? { |post| post.is_a?(ErrorRecoveryNode) && post.child.is_a?(ForwardingParameterNode) } + end + + def test_pinned_variable_node_variable_missing + result = Prism.parse("foo in ^Bar") + refute result.success? + + node = result.value.statements.body.first.pattern + assert_kind_of ErrorRecoveryNode, node.variable + assert_nil node.variable.child + end + + def test_rescue_node_reference_back_reference + result = Prism.parse("begin; rescue => $&; end") + refute result.success? + + node = result.value.statements.body.first.rescue_clause + assert_kind_of ErrorRecoveryNode, node.reference + assert_kind_of BackReferenceReadNode, node.reference.child + end + + def test_rescue_node_reference_numbered_reference + result = Prism.parse("begin; rescue => $1; end") + refute result.success? + + node = result.value.statements.body.first.rescue_clause + assert_kind_of ErrorRecoveryNode, node.reference + assert_kind_of NumberedReferenceReadNode, node.reference.child + end + + def test_rescue_node_reference_missing + result = Prism.parse("begin; rescue =>; end") + refute result.success? + + node = result.value.statements.body.first.rescue_clause + assert_kind_of ErrorRecoveryNode, node.reference + assert_nil node.reference.child + end + end +end diff --git a/test/prism/result/source_location_test.rb b/test/prism/result/source_location_test.rb index 38b971d02b..4cdf895781 100644 --- a/test/prism/result/source_location_test.rb +++ b/test/prism/result/source_location_test.rb @@ -920,7 +920,7 @@ def test_YieldNode end def test_all_tested - expected = Prism.constants.grep(/.Node$/).sort - %i[MissingNode ProgramNode] + expected = Prism.constants.grep(/.Node$/).sort - %i[ErrorRecoveryNode ProgramNode] actual = SourceLocationTest.instance_methods(false).grep(/.Node$/).map { |name| name[5..].to_sym }.sort assert_equal expected, actual end