Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 50 additions & 28 deletions rusty_lr_core/src/builder/grammar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ use super::States;
use crate::hash::HashMap;
use crate::rule::*;
use crate::token::Token;
use crate::TerminalSymbol;
use crate::TriState;

/// struct that holding pre-calculated information for `expand()` function.
#[derive(Debug, Clone)]
Expand Down Expand Up @@ -51,7 +53,7 @@ pub struct Grammar<Term, NonTerm> {
pub precedence_types: Vec<Option<ReduceType>>,
}

impl<Term, NonTerm> Grammar<Term, NonTerm> {
impl<Term, NonTerm> Grammar<TerminalSymbol<Term>, NonTerm> {
pub fn new() -> Self {
Grammar {
rules: Vec::new(),
Expand All @@ -68,8 +70,8 @@ impl<Term, NonTerm> Grammar<Term, NonTerm> {
pub fn add_rule(
&mut self,
name: NonTerm,
rule: Vec<Token<Term, NonTerm>>,
lookaheads: Option<BTreeSet<Term>>,
rule: Vec<Token<TerminalSymbol<Term>, NonTerm>>,
lookaheads: Option<BTreeSet<TerminalSymbol<Term>>>,
precedence: Option<Precedence>,
priority: usize,
) -> usize
Expand All @@ -92,7 +94,7 @@ impl<Term, NonTerm> Grammar<Term, NonTerm> {
}

/// false if precedence already exists and different
pub fn add_precedence(&mut self, term: Term, level: usize) -> bool
pub fn add_precedence(&mut self, term: TerminalSymbol<Term>, level: usize) -> bool
where
Term: Hash + Eq,
{
Expand All @@ -110,7 +112,10 @@ impl<Term, NonTerm> Grammar<Term, NonTerm> {
}

/// search for every production rules with name 'name'
fn search_rules(&self, name: NonTerm) -> Result<&[usize], BuildError<Term, NonTerm>>
fn search_rules(
&self,
name: NonTerm,
) -> Result<&[usize], BuildError<TerminalSymbol<Term>, NonTerm>>
where
NonTerm: Hash + Eq,
{
Expand Down Expand Up @@ -187,14 +192,14 @@ impl<Term, NonTerm> Grammar<Term, NonTerm> {
}

/// pre calculate the information for `expand()` function.
fn calculate_expand_cache(&mut self) -> Result<(), BuildError<Term, NonTerm>>
fn calculate_expand_cache(&mut self) -> Result<(), BuildError<TerminalSymbol<Term>, NonTerm>>
where
Term: Ord + Copy,
NonTerm: Hash + Eq + Copy,
{
let mut pong: Vec<ExpandCache<Term>> = Vec::new();
let mut pong: Vec<ExpandCache<TerminalSymbol<Term>>> = Vec::new();
for (nonterm, nonterm_rules) in self.rules_map.iter() {
let mut rules: BTreeMap<usize, ExpandCache<Term>> = nonterm_rules
let mut rules: BTreeMap<usize, ExpandCache<TerminalSymbol<Term>>> = nonterm_rules
.iter()
.map(|rule| {
(
Expand Down Expand Up @@ -265,9 +270,9 @@ impl<Term, NonTerm> Grammar<Term, NonTerm> {
/// 1st `bool` of returned tuple is true if follow_tokens can be empty.
fn lookahead(
&self,
follow_tokens: &[Token<Term, NonTerm>],
lookaheads: &BTreeSet<Term>,
) -> Result<(BTreeSet<Term>, bool), BuildError<Term, NonTerm>>
follow_tokens: &[Token<TerminalSymbol<Term>, NonTerm>],
lookaheads: &BTreeSet<TerminalSymbol<Term>>,
) -> Result<(BTreeSet<TerminalSymbol<Term>>, bool), BuildError<TerminalSymbol<Term>, NonTerm>>
where
Term: Ord + Copy,
NonTerm: Copy + Hash + Eq,
Expand Down Expand Up @@ -298,7 +303,10 @@ impl<Term, NonTerm> Grammar<Term, NonTerm> {

/// for given set of production rules,
/// if the first token of each rule is nonterminal, attach its production rules
fn expand(&self, rules: &mut LookaheadRuleRefSet<Term>) -> Result<(), BuildError<Term, NonTerm>>
fn expand(
&self,
rules: &mut LookaheadRuleRefSet<TerminalSymbol<Term>>,
) -> Result<(), BuildError<TerminalSymbol<Term>, NonTerm>>
where
Term: Copy + Ord,
NonTerm: Copy + Hash + Eq,
Expand Down Expand Up @@ -390,8 +398,8 @@ impl<Term, NonTerm> Grammar<Term, NonTerm> {
/// check for any shift/reduce or reduce/reduce conflicts and report them to `diags`.
fn check_conflicts(
&self,
states: &[State<Term, NonTerm>],
diags: &mut DiagnosticCollector<Term>,
states: &[State<TerminalSymbol<Term>, NonTerm>],
diags: &mut DiagnosticCollector<TerminalSymbol<Term>>,
) where
Term: Ord + Copy + Hash,
NonTerm: Copy + PartialEq + Ord,
Expand Down Expand Up @@ -487,8 +495,8 @@ impl<Term, NonTerm> Grammar<Term, NonTerm> {
fn build_full(
&mut self,
augmented_name: NonTerm,
diags: &mut DiagnosticCollector<Term>,
) -> Result<States<Term, NonTerm>, BuildError<Term, NonTerm>>
diags: &mut DiagnosticCollector<TerminalSymbol<Term>>,
) -> Result<States<TerminalSymbol<Term>, NonTerm>, BuildError<TerminalSymbol<Term>, NonTerm>>
where
Term: Copy + Ord + Hash,
NonTerm: Copy + Hash + Ord,
Expand Down Expand Up @@ -525,15 +533,25 @@ impl<Term, NonTerm> Grammar<Term, NonTerm> {
panic!("main state is not 0");
}

for state in &mut states {
if state
.shift_goto_map_term
.contains_key(&TerminalSymbol::Error)
|| state.reduce_map.contains_key(&TerminalSymbol::Error)
{
state.can_accept_error = TriState::True;
}
}

Ok(States { states })
}

/// build minimal-LR(1) parser table from given grammar
pub fn build(
&mut self,
augmented_name: NonTerm,
diags: &mut DiagnosticCollector<Term>,
) -> Result<States<Term, NonTerm>, BuildError<Term, NonTerm>>
diags: &mut DiagnosticCollector<TerminalSymbol<Term>>,
) -> Result<States<TerminalSymbol<Term>, NonTerm>, BuildError<TerminalSymbol<Term>, NonTerm>>
where
Term: Copy + Ord + Hash,
NonTerm: Copy + Hash + Ord,
Expand Down Expand Up @@ -662,6 +680,8 @@ impl<Term, NonTerm> Grammar<Term, NonTerm> {
.shift_goto_map_nonterm
.append(&mut state_b.shift_goto_map_nonterm);
state_a.reduce_map.append(&mut state_b.reduce_map);
state_a.can_accept_error =
state_a.can_accept_error | state_b.can_accept_error;
merged = true;
}
}
Expand Down Expand Up @@ -716,8 +736,8 @@ impl<Term, NonTerm> Grammar<Term, NonTerm> {
pub fn build_lalr(
&mut self,
augmented_name: NonTerm,
diags: &mut DiagnosticCollector<Term>,
) -> Result<States<Term, NonTerm>, BuildError<Term, NonTerm>>
diags: &mut DiagnosticCollector<TerminalSymbol<Term>>,
) -> Result<States<TerminalSymbol<Term>, NonTerm>, BuildError<TerminalSymbol<Term>, NonTerm>>
where
Term: Copy + Ord + Hash,
NonTerm: Copy + Hash + Ord,
Expand Down Expand Up @@ -757,6 +777,8 @@ impl<Term, NonTerm> Grammar<Term, NonTerm> {
.or_default()
.append(&mut reduce_rules);
}
states[state_a].can_accept_error =
states[state_a].can_accept_error | state_b.can_accept_error;
}
}
let mut new_states = Vec::with_capacity(states.len() - merge_into.len());
Expand Down Expand Up @@ -927,11 +949,11 @@ impl<Term, NonTerm> Grammar<Term, NonTerm> {
/// build new state with given production rules
fn build_recursive(
&self,
mut rules: LookaheadRuleRefSet<Term>,
states: &mut Vec<State<Term, NonTerm>>,
state_map: &mut BTreeMap<LookaheadRuleRefSet<Term>, usize>,
diags: &mut DiagnosticCollector<Term>,
) -> Result<usize, BuildError<Term, NonTerm>>
mut rules: LookaheadRuleRefSet<TerminalSymbol<Term>>,
states: &mut Vec<State<TerminalSymbol<Term>, NonTerm>>,
state_map: &mut BTreeMap<LookaheadRuleRefSet<TerminalSymbol<Term>>, usize>,
diags: &mut DiagnosticCollector<TerminalSymbol<Term>>,
) -> Result<usize, BuildError<TerminalSymbol<Term>, NonTerm>>
where
Term: Hash + Ord + Copy,
NonTerm: Hash + Ord + Copy,
Expand All @@ -954,7 +976,7 @@ impl<Term, NonTerm> Grammar<Term, NonTerm> {
// we don't care about the conflicts here
let mut next_rules_term = BTreeMap::new();
let mut next_rules_nonterm = BTreeMap::new();
let mut reduce_map: BTreeMap<Term, BTreeSet<usize>> = BTreeMap::new();
let mut reduce_map: BTreeMap<TerminalSymbol<Term>, BTreeSet<usize>> = BTreeMap::new();
for (mut rule_ref, lookaheads) in rules.rules.into_iter() {
let rule = &self.rules[rule_ref.rule].rule;
match rule.rule.get(rule_ref.shifted) {
Expand Down Expand Up @@ -1118,7 +1140,7 @@ impl<Term, NonTerm> Grammar<Term, NonTerm> {

// process rules that no more tokens left to shift
// if next token is one of lookahead, add reduce action
// if there are multiple recude rules for same lookahead, it is a reduce/reduce conflict
// if there are multiple reduce rules for same lookahead, it is a reduce/reduce conflict
// reduce_type conflict resolving
for (lookahead, reduce_rules) in reduce_map.into_iter() {
let state = &mut states[state_id];
Expand Down Expand Up @@ -1160,7 +1182,7 @@ impl<Term, NonTerm> Grammar<Term, NonTerm> {
// }
// }

impl<Term, NonTerm> Default for Grammar<Term, NonTerm> {
impl<Term, NonTerm> Default for Grammar<TerminalSymbol<Term>, NonTerm> {
fn default() -> Self {
Self::new()
}
Expand Down
5 changes: 5 additions & 0 deletions rusty_lr_core/src/builder/state.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
use std::collections::BTreeMap;
use std::collections::BTreeSet;

use crate::TriState;

/// state for internal usage during grammar building stage
#[derive(Debug, Clone)]
pub struct State<Term, NonTerm> {
pub shift_goto_map_term: BTreeMap<Term, usize>,
pub shift_goto_map_nonterm: BTreeMap<NonTerm, usize>,
pub reduce_map: BTreeMap<Term, BTreeSet<usize>>,
pub ruleset: BTreeSet<crate::rule::ShiftedRuleRef>,
pub can_accept_error: TriState,
}
impl<Term, NonTerm> State<Term, NonTerm> {
pub fn new() -> Self {
Expand All @@ -16,6 +19,7 @@ impl<Term, NonTerm> State<Term, NonTerm> {
shift_goto_map_nonterm: Default::default(),
reduce_map: Default::default(),
ruleset: Default::default(),
can_accept_error: TriState::False,
}
}

Expand Down Expand Up @@ -79,6 +83,7 @@ where
.map(|(term, rules)| (term, rules.into_iter().collect()))
.collect(),
ruleset: state.ruleset.into_iter().collect(),
can_accept_error: state.can_accept_error,
}
}
}
19 changes: 19 additions & 0 deletions rusty_lr_core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,22 @@ impl std::error::Error for DefaultReduceActionError {
"Default reduce action error"
}
}

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TriState {
False,
Maybe,
True,
}
impl std::ops::BitOr for TriState {
type Output = Self;
fn bitor(self, rhs: Self) -> Self::Output {
match (self, rhs) {
(TriState::False, TriState::False) => TriState::False,
(TriState::False, _) => TriState::Maybe,
(_, TriState::False) => TriState::Maybe,
(TriState::True, TriState::True) => TriState::True,
_ => TriState::Maybe,
}
Comment on lines +62 to +69
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

critical

The bitor implementation for TriState seems incorrect. When merging two states, if one of them can definitely accept an error (True), the merged state should also be True. The current logic can result in Maybe in cases like True | False, which is incorrect. This can lead to less efficient error recovery, as it might trigger expensive checks unnecessarily, and could potentially lead to recovery failures.

The logic should prioritize True over Maybe, and Maybe over False. Here is a corrected implementation:

        match (self, rhs) {
            (TriState::True, _) | (_, TriState::True) => TriState::True,
            (TriState::Maybe, _) | (_, TriState::Maybe) => TriState::Maybe,
            (TriState::False, TriState::False) => TriState::False,
        }

}
}
Loading