From 3fb4d741431533fe0e203fa3adf89f7e8928183b Mon Sep 17 00:00:00 2001 From: matteomartelli Date: Sat, 20 Nov 2021 15:34:29 +0100 Subject: [PATCH 1/8] add parent token as caret symbol '^' --- src/paths/parser_node_visitor.rs | 2 +- src/paths/path_parser.rs | 45 +++++++++++++++++++++++++++++--- src/paths/tokenizer.rs | 4 +++ src/paths/tokens.rs | 5 ++++ src/selector/selector_impl.rs | 8 ++++++ 5 files changed, 60 insertions(+), 4 deletions(-) diff --git a/src/paths/parser_node_visitor.rs b/src/paths/parser_node_visitor.rs index ab35c1e3..ced7fbae 100644 --- a/src/paths/parser_node_visitor.rs +++ b/src/paths/parser_node_visitor.rs @@ -21,7 +21,7 @@ pub trait ParserNodeVisitor<'a> { | ParseToken::Bool(_) => { token_handler.handle(&parse_node.token, parse_value_reader); } - ParseToken::In | ParseToken::Leaves => { + ParseToken::In | ParseToken::Leaves | ParseToken::Parent => { if let Some(n) = &parse_node.left { self.visit(&*n, token_handler, parse_value_reader); } diff --git a/src/paths/path_parser.rs b/src/paths/path_parser.rs index 178e105d..5567bf37 100644 --- a/src/paths/path_parser.rs +++ b/src/paths/path_parser.rs @@ -86,6 +86,10 @@ impl<'a> ParserImpl<'a> { self.eat_token(); self.paths_dot(prev) } + Ok(Token::Caret(_)) => { + self.eat_token(); + self.paths_caret(prev) + } Ok(Token::OpenArray(_)) => { self.eat_token(); self.eat_whitespace(); @@ -98,12 +102,18 @@ impl<'a> ParserImpl<'a> { fn paths_dot(&mut self, prev: ParserNode) -> Result { debug!("#paths_dot"); - let node = self.path(prev)?; + let node = self.path_dot(prev)?; self.paths(node) } - fn path(&mut self, prev: ParserNode) -> Result { - debug!("#path"); + fn paths_caret(&mut self, prev: ParserNode) -> Result { + debug!("#paths_caret"); + let node = self.path_caret(prev)?; + self.paths(node) + } + + fn path_dot(&mut self, prev: ParserNode) -> Result { + debug!("#path_dot"); match self.token_reader.peek_token() { Ok(Token::Dot(_)) => self.path_leaves(prev), Ok(Token::Asterisk(_)) => self.path_in_all(prev), @@ -116,6 +126,25 @@ impl<'a> ParserImpl<'a> { } } + fn path_caret(&mut self, prev: ParserNode) -> Result { + debug!("#path_caret"); + match self.token_reader.peek_token() { + Ok(Token::Caret(_)) => { + Ok(ParserNode { + token: ParseToken::Parent, + left: Some(Box::new(prev)), + right: None, + }) + } + Ok(Token::Key(_)) => self.path_parent_key(prev), + Ok(Token::OpenArray(_)) => { + self.eat_token(); + self.array(prev) + } + _ => Err(self.token_reader.to_error()), + } + } + fn path_leaves(&mut self, prev: ParserNode) -> Result { debug!("#path_leaves"); self.eat_token(); @@ -172,6 +201,16 @@ impl<'a> ParserImpl<'a> { }) } + #[allow(clippy::unnecessary_wraps)] + fn path_parent_key(&mut self, prev: ParserNode) -> Result { + debug!("#path_parent_key"); + Ok(ParserNode { + token: ParseToken::Parent, + left: Some(Box::new(prev)), + right: Some(Box::new(self.key()?)), + }) + } + fn key(&mut self) -> Result { debug!("#key"); match self.token_reader.next_token() { diff --git a/src/paths/tokenizer.rs b/src/paths/tokenizer.rs index fd61c7e5..787768ce 100644 --- a/src/paths/tokenizer.rs +++ b/src/paths/tokenizer.rs @@ -5,6 +5,7 @@ use super::tokens::Token; const CH_DOLLA: char = '$'; const CH_DOT: char = '.'; +const CH_CARET: char = '^'; const CH_ASTERISK: char = '*'; const CH_LARRAY: char = '['; const CH_RARRAY: char = ']'; @@ -51,6 +52,7 @@ impl<'a> Tokenizer<'a> { fn dolla(&mut self) -> Result { let fun = |c: &char| match c { &CH_DOT + | &CH_CARET | &CH_ASTERISK | &CH_LARRAY | &CH_RARRAY @@ -177,6 +179,7 @@ impl<'a> Tokenizer<'a> { let fun = |c: &char| match c { &CH_DOLLA | &CH_DOT + | &CH_CARET | &CH_ASTERISK | &CH_LARRAY | &CH_RARRAY @@ -203,6 +206,7 @@ impl<'a> Tokenizer<'a> { match ch { CH_DOLLA => self.dolla(), CH_DOT => Ok(Token::Dot(span)), + CH_CARET => Ok(Token::Caret(span)), CH_ASTERISK => Ok(Token::Asterisk(span)), CH_LARRAY => Ok(Token::OpenArray(span)), CH_RARRAY => Ok(Token::CloseArray(span)), diff --git a/src/paths/tokens.rs b/src/paths/tokens.rs index 0d4cc919..91f34cbf 100644 --- a/src/paths/tokens.rs +++ b/src/paths/tokens.rs @@ -25,6 +25,7 @@ pub enum Token { And(StrRange), Or(StrRange), Whitespace(StrRange), + Caret(StrRange) } impl Token { @@ -53,6 +54,7 @@ impl Token { Token::And(_) => matches!(other, Token::And(_)), Token::Or(_) => matches!(other, Token::Or(_)), Token::Whitespace(_) => matches!(other, Token::Whitespace(_)), + Token::Caret(_) => matches!(other, Token::Caret(_)), } } @@ -81,6 +83,7 @@ impl Token { Token::And(_) => Token::And(new_span), Token::Or(_) => Token::Or(new_span), Token::Whitespace(_) => Token::Whitespace(new_span), + Token::Caret(_) => Token::Caret(new_span), } } } @@ -97,6 +100,8 @@ pub enum ParseToken { Leaves, // '*' All, + // '^' + Parent, Key(StrRange), Keys(Vec), diff --git a/src/selector/selector_impl.rs b/src/selector/selector_impl.rs index 84eb24be..ddcdc02d 100644 --- a/src/selector/selector_impl.rs +++ b/src/selector/selector_impl.rs @@ -440,6 +440,14 @@ impl<'a> ParserTokenHandler<'a> for JsonSelector<'a> { ParseToken::In | ParseToken::Leaves | ParseToken::Array => { self.tokens.push(token.clone()); } + ParseToken::Parent => { + //TODO retrieve parents from a Map &parent_value> + let mut vec:Vec<&Value> = Vec::new(); + self.current = Some(vec); + + //TODO push token only if last is not a parent already + self.tokens.push(token.clone()); + } ParseToken::ArrayEof => self.visit_array_eof(), ParseToken::All => self.visit_all(), ParseToken::Bool(b) => { From ac40e57e4699629e5a3503e684fd15e27d0d8641 Mon Sep 17 00:00:00 2001 From: matteomartelli Date: Mon, 22 Nov 2021 18:35:06 +0100 Subject: [PATCH 2/8] add an hashmap to store the parents of all the visited nodes Such hashmap needs to be propagated across the whole tree visit. For each node Value, its parent is saved in the hashmap where the key is the node Value pointer. When the parent operator is later evalutaed, all the parent nodes of the current result vector are retrieved. --- src/selector/selector_impl.rs | 54 ++++++++++++------ src/selector/terms.rs | 62 ++++++++++---------- src/selector/value_walker.rs | 104 +++++++++++++++++++++++++--------- 3 files changed, 145 insertions(+), 75 deletions(-) diff --git a/src/selector/selector_impl.rs b/src/selector/selector_impl.rs index ddcdc02d..1647daa5 100644 --- a/src/selector/selector_impl.rs +++ b/src/selector/selector_impl.rs @@ -1,4 +1,5 @@ use std::collections::HashSet; +use std::collections::HashMap; use std::rc::Rc; use serde_json::{Number, Value}; @@ -18,6 +19,7 @@ pub struct JsonSelector<'a> { current: Option>, selectors: Vec>, selector_filter: FilterTerms<'a>, + parents: HashMap<*const Value, &'a Value> } impl<'a> JsonSelector<'a> { @@ -29,6 +31,7 @@ impl<'a> JsonSelector<'a> { current: None, selectors: Vec::new(), selector_filter: FilterTerms(Vec::new()), + parents: HashMap::new() } } @@ -40,6 +43,7 @@ impl<'a> JsonSelector<'a> { current: None, selectors: Vec::new(), selector_filter: FilterTerms(Vec::new()), + parents: HashMap::new() } } @@ -156,6 +160,7 @@ impl<'a> JsonSelector<'a> { current: Some(vec![value]), selectors: Vec::new(), selector_filter: FilterTerms(Vec::new()), + parents: HashMap::new() }; self.selectors.push(selector); } @@ -172,18 +177,31 @@ impl<'a> JsonSelector<'a> { let array_token = self.tokens.pop(); if let Some(ParseToken::Leaves) = self.tokens.last() { self.tokens.pop(); - self.current = self.selector_filter.collect_all(self.current.take()); + self.current = self.selector_filter.collect_all(&mut self.parents, self.current.take()); } self.tokens.push(array_token.unwrap()); } self.selector_filter.new_filter_context(); } + fn visit_parent(&mut self) { + let mut vec:Vec<&Value> = Vec::new(); + if let Some(current) = &self.current { + for v in current { + let ptr = *v as *const Value; + if let Some(parent) = self.parents.get(&ptr) { + vec.push(parent); + } + } + } + self.current = Some(vec); + } + fn visit_array_eof(&mut self) { if self.is_last_before_token_match(ParseToken::Array) { if let Some(Some(e)) = self.selector_filter.pop_term() { if let ExprTerm::String(key) = e { - self.current = self.selector_filter.filter_next_with_str(self.current.take(), key); + self.current = self.selector_filter.filter_next_with_str(&mut self.parents, self.current.take(), key); self.tokens.pop(); return; } @@ -198,12 +216,12 @@ impl<'a> JsonSelector<'a> { if let Some(Some(e)) = self.selector_filter.pop_term() { let selector_filter_consumed = match e { ExprTerm::Number(n) => { - self.current = self.selector_filter.collect_all_with_num(self.current.take(), utils::to_f64(&n)); + self.current = self.selector_filter.collect_all_with_num(&mut self.parents, self.current.take(), utils::to_f64(&n)); self.selector_filter.pop_term(); true } ExprTerm::String(key) => { - self.current = self.selector_filter.collect_all_with_str(self.current.take(), key); + self.current = self.selector_filter.collect_all_with_str(&mut self.parents, self.current.take(), key); self.selector_filter.pop_term(); true } @@ -222,10 +240,10 @@ impl<'a> JsonSelector<'a> { if let Some(Some(e)) = self.selector_filter.pop_term() { match e { ExprTerm::Number(n) => { - self.current = self.selector_filter.collect_next_with_num(self.current.take(), utils::to_f64(&n)); + self.current = self.selector_filter.collect_next_with_num(&mut self.parents, self.current.take(), utils::to_f64(&n)); } ExprTerm::String(key) => { - self.current = self.selector_filter.collect_next_with_str(self.current.take(), &[key]); + self.current = self.selector_filter.collect_next_with_str(&mut self.parents, self.current.take(), &[key]); } ExprTerm::Json(rel, _, v) => { if v.is_empty() { @@ -262,14 +280,14 @@ impl<'a> JsonSelector<'a> { match self.tokens.last() { Some(ParseToken::Leaves) => { self.tokens.pop(); - self.current = self.selector_filter.collect_all(self.current.take()); + self.current = self.selector_filter.collect_all(&mut self.parents, self.current.take()); } Some(ParseToken::In) => { self.tokens.pop(); - self.current = self.selector_filter.collect_next_all(self.current.take()); + self.current = self.selector_filter.collect_next_all(&mut self.parents, self.current.take()); } _ => { - self.current = self.selector_filter.collect_next_all(self.current.take()); + self.current = self.selector_filter.collect_next_all(&mut self.parents, self.current.take()); } } } @@ -284,20 +302,23 @@ impl<'a> JsonSelector<'a> { if self.selector_filter.is_term_empty() { match t { ParseToken::Leaves => { - self.current = self.selector_filter.collect_all_with_str(self.current.take(), key) + self.current = self.selector_filter.collect_all_with_str(&mut self.parents, self.current.take(), key) } ParseToken::In => { - self.current = self.selector_filter.collect_next_with_str(self.current.take(), &[key]) + self.current = self.selector_filter.collect_next_with_str(&mut self.parents, self.current.take(), &[key]) + } + ParseToken::Parent => { + self.current = self.selector_filter.collect_next_with_str(&mut self.parents, self.current.take(), &[key]) } _ => {} } } else { match t { ParseToken::Leaves => { - self.current = self.selector_filter.filter_all_with_str(self.current.take(), key); + self.current = self.selector_filter.filter_all_with_str(&mut self.parents, self.current.take(), key); } ParseToken::In => { - self.current = self.selector_filter.filter_next_with_str(self.current.take(), key); + self.current = self.selector_filter.filter_next_with_str(&mut self.parents, self.current.take(), key); } _ => {} } @@ -311,7 +332,7 @@ impl<'a> JsonSelector<'a> { } if let Some(ParseToken::Array) = self.tokens.pop() { - self.current = self.selector_filter.collect_next_with_str(self.current.take(), keys); + self.current = self.selector_filter.collect_next_with_str(&mut self.parents, self.current.take(), keys); } else { unreachable!(); } @@ -441,12 +462,9 @@ impl<'a> ParserTokenHandler<'a> for JsonSelector<'a> { self.tokens.push(token.clone()); } ParseToken::Parent => { - //TODO retrieve parents from a Map &parent_value> - let mut vec:Vec<&Value> = Vec::new(); - self.current = Some(vec); - //TODO push token only if last is not a parent already self.tokens.push(token.clone()); + self.visit_parent() } ParseToken::ArrayEof => self.visit_array_eof(), ParseToken::All => self.visit_all(), diff --git a/src/selector/terms.rs b/src/selector/terms.rs index 3077e7a0..bb772fc2 100644 --- a/src/selector/terms.rs +++ b/src/selector/terms.rs @@ -1,4 +1,5 @@ use std::collections::HashSet; +use std::collections::HashMap; use serde_json::{Number, Value}; @@ -335,18 +336,19 @@ impl<'a> FilterTerms<'a> { self.0.pop() } - fn filter_json_term(&mut self, e: ExprTerm<'a>, fun: F) + fn filter_json_term(&mut self, parents: &mut HashMap<*const Value, &'a Value>, e: ExprTerm<'a>, fun: F) where - F: Fn(&Vec<&'a Value>, &mut Option>) -> FilterResult<'a>, + F: Fn(&mut HashMap<*const Value, &'a Value>, &Vec<&'a Value>, &mut Option>) -> FilterResult<'a>, { debug!("filter_json_term: {:?}", e); if let ExprTerm::Json(rel, fk, vec) = e { let mut not_matched = Some(HashSet::new()); let filter_result = if let Some(FilterKey::String(key)) = fk { - fun(&ValueWalker::next_with_str(&vec, key), &mut not_matched) + let vec:Vec<&Value> = ValueWalker::next_with_str(parents, &vec, key); + fun(parents, &vec, &mut not_matched) } else { - fun(&vec, &mut not_matched) + fun(parents, &vec, &mut not_matched) }; if rel.is_some() { @@ -369,14 +371,14 @@ impl<'a> FilterTerms<'a> { } } - fn push_json_term(&mut self, current: Option>, fun: F) -> Option> + fn push_json_term(&mut self, parents: &mut HashMap<*const Value, &'a Value>, current: Option>, fun: F) -> Option> where - F: Fn(&Vec<&'a Value>, &mut Option>) -> FilterResult<'a>, + F: Fn(&mut HashMap<*const Value, &'a Value>, &Vec<&'a Value>, &mut Option>) -> FilterResult<'a>, { debug!("push_json_term: {:?}", ¤t); if let Some(current) = ¤t { - let filter_result = fun(current, &mut None); + let filter_result = fun(parents, current, &mut None); self.push_term(Some(ExprTerm::Json( None, Some(filter_result.key), @@ -386,28 +388,29 @@ impl<'a> FilterTerms<'a> { current } - fn filter(&mut self, current: Option>, fun: F) -> Option> + fn filter(&mut self, parents: &mut HashMap<*const Value, &'a Value>, current: Option>, fun: F) -> Option> where - F: Fn(&Vec<&'a Value>, &mut Option>) -> FilterResult<'a>, + F: Fn(&mut HashMap<*const Value, &'a Value>, &Vec<&'a Value>, &mut Option>) -> FilterResult<'a>, { + println!("filter"); let peek = self.pop_term(); if let Some(None) = peek { - return self.push_json_term(current, fun); + return self.push_json_term(parents, current, fun); } if let Some(Some(e)) = peek { - self.filter_json_term(e, fun); + self.filter_json_term(parents, e, fun); } current } - pub fn filter_all_with_str(&mut self, current: Option>, key: &'a str) -> Option> { - let current = self.filter(current, |vec, _| { + pub fn filter_all_with_str(&mut self, parents: &mut HashMap<*const Value, &'a Value>, current: Option>, key: &'a str) -> Option> { + let current = self.filter(parents, current, |parents, vec, _| { FilterResult { key: FilterKey::All, - collected: ValueWalker::all_with_str(vec, key) + collected: ValueWalker::all_with_str(parents, vec, key) } }); @@ -415,14 +418,15 @@ impl<'a> FilterTerms<'a> { current } - pub fn filter_next_with_str(&mut self, current: Option>, key: &'a str) -> Option> { - let current = self.filter(current, |vec, not_matched| { + pub fn filter_next_with_str(&mut self, parents: &mut HashMap<*const Value, &'a Value>, current: Option>, key: &'a str) -> Option> { + let current = self.filter(parents, current, |parents, vec, not_matched| { let mut visited = HashSet::new(); let mut acc = Vec::new(); let path_key = &utils::to_path_str(key); - ValueWalker::walk_dedup_all(vec, + ValueWalker::walk_dedup_all(parents, + vec, path_key.get_key(), &mut visited, &mut |v| { @@ -446,7 +450,7 @@ impl<'a> FilterTerms<'a> { current } - pub fn collect_next_with_num(&mut self, current: Option>, index: f64) -> Option> { + pub fn collect_next_with_num(&mut self, parents: &mut HashMap<*const Value, &'a Value>, current: Option>, index: f64) -> Option> { if current.is_none() { debug!("collect_next_with_num : {:?}, {:?}", &index, ¤t); return current; @@ -473,7 +477,7 @@ impl<'a> FilterTerms<'a> { } } - let acc = ValueWalker::next_with_num(¤t.unwrap(), index); + let acc = ValueWalker::next_with_num(parents, ¤t.unwrap(), index); if acc.is_empty() { self.pop_term(); @@ -482,7 +486,7 @@ impl<'a> FilterTerms<'a> { Some(acc) } - pub fn collect_next_with_str(&mut self, current: Option>, keys: &[&'a str]) -> Option> { + pub fn collect_next_with_str(&mut self, parents: &mut HashMap<*const Value, &'a Value>, current: Option>, keys: &[&'a str]) -> Option> { if current.is_none() { debug!( "collect_next_with_str : {:?}, {:?}", @@ -491,7 +495,7 @@ impl<'a> FilterTerms<'a> { return current; } - let acc = ValueWalker::all_with_strs(current.as_ref().unwrap(), keys); + let acc = ValueWalker::all_with_strs(parents, current.as_ref().unwrap(), keys); if acc.is_empty() { self.pop_term(); @@ -500,37 +504,37 @@ impl<'a> FilterTerms<'a> { Some(acc) } - pub fn collect_next_all(&mut self, current: Option>) -> Option> { + pub fn collect_next_all(&mut self, parents: &mut HashMap<*const Value, &'a Value>, current: Option>) -> Option> { if current.is_none() { debug!("collect_next_all : {:?}", ¤t); return current; } - Some(ValueWalker::next_all(¤t.unwrap())) + Some(ValueWalker::next_all(parents, ¤t.unwrap())) } - pub fn collect_all(&mut self, current: Option>) -> Option> { + pub fn collect_all(&mut self, parents: &mut HashMap<*const Value, &'a Value>, current: Option>) -> Option> { if current.is_none() { debug!("collect_all: {:?}", ¤t); return current; } - Some(ValueWalker::all(current.as_ref().unwrap())) + Some(ValueWalker::all(parents, current.as_ref().unwrap())) } - pub fn collect_all_with_str(&mut self, current: Option>, key: &'a str) -> Option> { + pub fn collect_all_with_str(&mut self, parents: &mut HashMap<*const Value, &'a Value>, current: Option>, key: &'a str) -> Option> { if current.is_none() { debug!("collect_all_with_str: {}, {:?}", key, ¤t); return current; } - let ret = ValueWalker::all_with_str(current.as_ref().unwrap(), key); + let ret = ValueWalker::all_with_str(parents, current.as_ref().unwrap(), key); Some(ret) } - pub fn collect_all_with_num(&mut self, mut current: Option>, index: f64) -> Option> { + pub fn collect_all_with_num(&mut self, parents: &mut HashMap<*const Value, &'a Value>, mut current: Option>, index: f64) -> Option> { if let Some(current) = current.take() { - let ret = ValueWalker::all_with_num(¤t, index); + let ret = ValueWalker::all_with_num(parents, ¤t, index); if !ret.is_empty() { return Some(ret); } diff --git a/src/selector/value_walker.rs b/src/selector/value_walker.rs index 2c7c1039..101ec7a0 100644 --- a/src/selector/value_walker.rs +++ b/src/selector/value_walker.rs @@ -1,4 +1,5 @@ use std::collections::HashSet; +use std::collections::HashMap; use serde_json::Value; use super::utils; @@ -7,41 +8,65 @@ use selector::utils::PathKey; pub(super) struct ValueWalker; impl<'a> ValueWalker { - pub fn next_all(vec: &[&'a Value]) -> Vec<&'a Value> { + pub fn next_all(parents: &mut HashMap<*const Value, &'a Value>, vec: &[&'a Value]) -> Vec<&'a Value> { vec.iter().fold(Vec::new(), |mut acc, v| { match v { - Value::Object(map) => acc.extend(map.values()), - Value::Array(vec) => acc.extend(vec), + Value::Object(map) => { + map.values().into_iter().for_each(|v_el| { + let ptr_el = v_el as *const Value; + if !parents.contains_key(&ptr_el) { + parents.insert(ptr_el, v); + } + acc.push(v_el) + }) + }, + Value::Array(vec) => { + vec.iter().for_each(|v_el| { + let ptr_el = v_el as *const Value; + if !parents.contains_key(&ptr_el) { + parents.insert(ptr_el, v); + } + acc.push(v_el) + }) + } _ => {} } acc }) } - pub fn next_with_str(vec: &[&'a Value], key: &'a str) -> Vec<&'a Value> { + pub fn next_with_str(parents: &mut HashMap<*const Value, &'a Value>, vec: &[&'a Value], key: &'a str) -> Vec<&'a Value> { vec.iter().fold(Vec::new(), |mut acc, v| { if let Value::Object(map) = v { - if let Some(v) = map.get(key) { - acc.push(v); + if let Some(v_el) = map.get(key) { + let ptr_el = v_el as *const Value; + if !parents.contains_key(&ptr_el) { + parents.insert(ptr_el, v); + } + acc.push(v_el); } } acc }) } - pub fn next_with_num(vec: &[&'a Value], index: f64) -> Vec<&'a Value> { + pub fn next_with_num(parents: &mut HashMap<*const Value, &'a Value>, vec: &[&'a Value], index: f64) -> Vec<&'a Value> { vec.iter().fold(Vec::new(), |mut acc, v| { if let Value::Array(vec) = v { - if let Some(v) = vec.get(utils::abs_index(index as isize, vec.len())) { - acc.push(v); + if let Some(v_el) = vec.get(utils::abs_index(index as isize, vec.len())) { + let ptr_el = v_el as *const Value; + if !parents.contains_key(&ptr_el) { + parents.insert(ptr_el, v); + } + acc.push(v_el); } } acc }) } - pub fn all_with_num(vec: &[&'a Value], index: f64) -> Vec<&'a Value> { - Self::walk(vec, &|v, acc| { + pub fn all_with_num(parents: &mut HashMap<*const Value, &'a Value>, vec: &[&'a Value], index: f64) -> Vec<&'a Value> { + Self::walk(parents, vec, &|v, acc| { if v.is_array() { if let Some(v) = v.get(index as usize) { acc.push(v); @@ -50,29 +75,33 @@ impl<'a> ValueWalker { }) } - pub fn all_with_str(vec: &[&'a Value], key: &'a str) -> Vec<&'a Value> { + pub fn all_with_str(parents: &mut HashMap<*const Value, &'a Value>, vec: &[&'a Value], key: &'a str) -> Vec<&'a Value> { let path_key = utils::to_path_str(key); - Self::walk(vec, &|v, acc| if let Value::Object(map) = v { + Self::walk(parents, vec, &|v, acc| if let Value::Object(map) = v { if let Some(v) = map.get(path_key.get_key()) { acc.push(v); } }) } - pub fn all_with_strs(vec: &[&'a Value], keys: &[&'a str]) -> Vec<&'a Value> { + pub fn all_with_strs(parents: &mut HashMap<*const Value, &'a Value>, vec: &[&'a Value], keys: &[&'a str]) -> Vec<&'a Value> { let path_keys: &Vec = &keys.iter().map(|key| { utils::to_path_str(key) }).collect(); vec.iter().fold(Vec::new(), |mut acc, v| { if let Value::Object(map) = v { - path_keys.iter().for_each(|pk| if let Some(v) = map.get(pk.get_key()) { - acc.push(v) + path_keys.iter().for_each(|pk| if let Some(v_el) = map.get(pk.get_key()) { + let ptr_el = v_el as *const Value; + if !parents.contains_key(&ptr_el) { + parents.insert(ptr_el, v); + } + acc.push(v_el) }); } acc }) } - pub fn all(vec: &[&'a Value]) -> Vec<&'a Value> { - Self::walk(vec, &|v, acc| { + pub fn all(parents: &mut HashMap<*const Value, &'a Value>, vec: &[&'a Value]) -> Vec<&'a Value> { + Self::walk(parents, vec, &|v, acc| { match v { Value::Array(ay) => acc.extend(ay), Value::Object(map) => { @@ -83,17 +112,17 @@ impl<'a> ValueWalker { }) } - fn walk(vec: &[&'a Value], fun: &F) -> Vec<&'a Value> + fn walk(parents: &mut HashMap<*const Value, &'a Value>, vec: &[&'a Value], fun: &F) -> Vec<&'a Value> where F: Fn(&'a Value, &mut Vec<&'a Value>), { vec.iter().fold(Vec::new(), |mut acc, v| { - Self::_walk(v, &mut acc, fun); + Self::_walk(parents, v, &mut acc, fun); acc }) } - fn _walk(v: &'a Value, acc: &mut Vec<&'a Value>, fun: &F) + fn _walk(parents: &mut HashMap<*const Value, &'a Value>, v: &'a Value, acc: &mut Vec<&'a Value>, fun: &F) where F: Fn(&'a Value, &mut Vec<&'a Value>), { @@ -101,16 +130,29 @@ impl<'a> ValueWalker { match v { Value::Array(vec) => { - vec.iter().for_each(|v| Self::_walk(v, acc, fun)); + vec.iter().for_each(|v_el| { + let ptr_el = v_el as *const Value; + if !parents.contains_key(&ptr_el) { + parents.insert(ptr_el, v); + } + Self::_walk(parents, v_el, acc, fun) + }); } Value::Object(map) => { - map.values().into_iter().for_each(|v| Self::_walk(v, acc, fun)); + map.values().into_iter().for_each(|v_el| { + let ptr_el = v_el as *const Value; + if !parents.contains_key(&ptr_el) { + parents.insert(ptr_el, v); + } + Self::_walk(parents, v_el, acc, fun) + }); } _ => {} } } - pub fn walk_dedup_all(vec: &[&'a Value], + pub fn walk_dedup_all(parents: &mut HashMap<*const Value, &'a Value>, + vec: &[&'a Value], key: &str, visited: &mut HashSet<*const Value>, is_contain: &mut F1, @@ -120,7 +162,8 @@ impl<'a> ValueWalker { F1: FnMut(&'a Value), F2: FnMut(usize), { - vec.iter().enumerate().for_each(|(index, v)| Self::walk_dedup(v, + vec.iter().enumerate().for_each(|(index, v)| Self::walk_dedup(parents, + v, key, visited, index, @@ -129,7 +172,8 @@ impl<'a> ValueWalker { depth)); } - fn walk_dedup(v: &'a Value, + fn walk_dedup(parents: &mut HashMap<*const Value, &'a Value>, + v: &'a Value, key: &str, visited: &mut HashSet<*const Value>, index: usize, @@ -161,8 +205,12 @@ impl<'a> ValueWalker { if depth == 0 { is_not_contain(index); } - vec.iter().for_each(|v| { - Self::walk_dedup(v, key, visited, index, is_contain, is_not_contain, depth + 1); + vec.iter().for_each(|v_el| { + let ptr_el = v_el as *const Value; + if !parents.contains_key(&ptr_el) { + parents.insert(ptr_el, v); + } + Self::walk_dedup(parents, v_el, key, visited, index, is_contain, is_not_contain, depth + 1); }) } _ => { From d3cbeec0b96364697a17fc291a801c285816627a Mon Sep 17 00:00:00 2001 From: matteomartelli Date: Mon, 22 Nov 2021 22:19:09 +0100 Subject: [PATCH 3/8] parser: clean parent_parent case, add parent_all case --- src/paths/path_parser.rs | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/src/paths/path_parser.rs b/src/paths/path_parser.rs index 5567bf37..cd0c962e 100644 --- a/src/paths/path_parser.rs +++ b/src/paths/path_parser.rs @@ -129,13 +129,8 @@ impl<'a> ParserImpl<'a> { fn path_caret(&mut self, prev: ParserNode) -> Result { debug!("#path_caret"); match self.token_reader.peek_token() { - Ok(Token::Caret(_)) => { - Ok(ParserNode { - token: ParseToken::Parent, - left: Some(Box::new(prev)), - right: None, - }) - } + Ok(Token::Caret(_)) => self.path_parent_parent(prev), + Ok(Token::Asterisk(_)) => self.path_parent_all(prev), Ok(Token::Key(_)) => self.path_parent_key(prev), Ok(Token::OpenArray(_)) => { self.eat_token(); @@ -201,6 +196,27 @@ impl<'a> ParserImpl<'a> { }) } + #[allow(clippy::unnecessary_wraps)] + fn path_parent_all(&mut self, prev: ParserNode) -> Result { + debug!("#path_parent_key"); + self.eat_token(); + Ok(ParserNode { + token: ParseToken::Parent, + left: Some(Box::new(prev)), + right: Some(Box::new(self.create_node(ParseToken::All))), + }) + } + + #[allow(clippy::unnecessary_wraps)] + fn path_parent_parent(&mut self, prev: ParserNode) -> Result { + debug!("#path_parent_parent"); + Ok(ParserNode { + token: ParseToken::Parent, + left: Some(Box::new(prev)), + right: None, + }) + } + #[allow(clippy::unnecessary_wraps)] fn path_parent_key(&mut self, prev: ParserNode) -> Result { debug!("#path_parent_key"); From 7f8ffbabf351577a80fe4ca7c1886d694fe07cc1 Mon Sep 17 00:00:00 2001 From: matteomartelli Date: Mon, 22 Nov 2021 22:21:58 +0100 Subject: [PATCH 4/8] better handle parent token in selector --- src/selector/selector_impl.rs | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/selector/selector_impl.rs b/src/selector/selector_impl.rs index 1647daa5..6c53e25b 100644 --- a/src/selector/selector_impl.rs +++ b/src/selector/selector_impl.rs @@ -282,7 +282,7 @@ impl<'a> JsonSelector<'a> { self.tokens.pop(); self.current = self.selector_filter.collect_all(&mut self.parents, self.current.take()); } - Some(ParseToken::In) => { + Some(ParseToken::In) | Some(ParseToken::Parent) => { self.tokens.pop(); self.current = self.selector_filter.collect_next_all(&mut self.parents, self.current.take()); } @@ -304,10 +304,7 @@ impl<'a> JsonSelector<'a> { ParseToken::Leaves => { self.current = self.selector_filter.collect_all_with_str(&mut self.parents, self.current.take(), key) } - ParseToken::In => { - self.current = self.selector_filter.collect_next_with_str(&mut self.parents, self.current.take(), &[key]) - } - ParseToken::Parent => { + ParseToken::In | ParseToken::Parent => { self.current = self.selector_filter.collect_next_with_str(&mut self.parents, self.current.take(), &[key]) } _ => {} @@ -462,8 +459,10 @@ impl<'a> ParserTokenHandler<'a> for JsonSelector<'a> { self.tokens.push(token.clone()); } ParseToken::Parent => { - //TODO push token only if last is not a parent already - self.tokens.push(token.clone()); + match self.tokens.last() { + Some(ParseToken::Parent) => {} + _ => self.tokens.push(token.clone()) + } self.visit_parent() } ParseToken::ArrayEof => self.visit_array_eof(), From fcec170e6f8ac5e28faf48a66a913003cfe48dc4 Mon Sep 17 00:00:00 2001 From: matteomartelli Date: Mon, 22 Nov 2021 23:06:34 +0100 Subject: [PATCH 5/8] add test case for parent operator --- benchmark/example_deeper.json | 77 +++++++++++++++++++++++++++++++++++ tests/jsonpath_examples.rs | 14 +++++++ 2 files changed, 91 insertions(+) create mode 100644 benchmark/example_deeper.json diff --git a/benchmark/example_deeper.json b/benchmark/example_deeper.json new file mode 100644 index 00000000..8b30c18a --- /dev/null +++ b/benchmark/example_deeper.json @@ -0,0 +1,77 @@ +{ + "store": { + "book": [ + { + "category": "reference", + "author": "Nigel Rees", + "title": "Sayings of the Century", + "price": 8.95, + "reviews": [ + { + "vote": 3.2, + "user": "Angela" + }, + { + "vote": 3.5, + "user": "Eric" + } + ] + }, + { + "category": "fiction", + "author": "Evelyn Waugh", + "title": "Sword of Honour", + "price": 12.99, + "reviews": [ + { + "vote": 5.0, + "user": "Ruth" + }, + { + "vote": 2.0, + "user": "Philip" + } + ] + }, + { + "category": "fiction", + "author": "Herman Melville", + "title": "Moby Dick", + "isbn": "0-553-21311-3", + "price": 8.99, + "reviews": [ + { + "vote": 4.0, + "user": "Carol" + } + ] + }, + { + "category": "fiction", + "author": "J. R. R. Tolkien", + "title": "The Lord of the Rings", + "isbn": "0-395-19395-8", + "price": 22.99, + "reviews": [ + { + "vote": 5.0, + "user": "Isaac" + }, + { + "vote": 4.8, + "user": "Chris" + }, + { + "vote": 4.3, + "user": "Frank" + } + ] + } + ], + "bicycle": { + "color": "red", + "price": 19.95 + } + }, + "expensive": 10 +} diff --git a/tests/jsonpath_examples.rs b/tests/jsonpath_examples.rs index 35e84a3d..b446acf4 100644 --- a/tests/jsonpath_examples.rs +++ b/tests/jsonpath_examples.rs @@ -240,3 +240,17 @@ fn give_me_every_thing() { read_json("./benchmark/giveme_every_thing_result.json"), ); } + +#[test] +fn all_titles_of_books_in_store_ranked_as_5() { + setup(); + + select_and_then_compare( + r#"$.store.book[*].reviews[?(@.vote == 5)]^^title"#, + read_json("./benchmark/example_deeper.json"), + json!([ + "Sword of Honour", + "The Lord of the Rings", + ]), + ); +} \ No newline at end of file From 88478f5bde8c999f38da0801278e87a8edd32fc4 Mon Sep 17 00:00:00 2001 From: matteomartelli Date: Tue, 30 Nov 2021 22:36:00 +0100 Subject: [PATCH 6/8] fix parent_array token sequence in path parser --- src/paths/path_parser.rs | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/paths/path_parser.rs b/src/paths/path_parser.rs index cd0c962e..9e0d515c 100644 --- a/src/paths/path_parser.rs +++ b/src/paths/path_parser.rs @@ -132,10 +132,7 @@ impl<'a> ParserImpl<'a> { Ok(Token::Caret(_)) => self.path_parent_parent(prev), Ok(Token::Asterisk(_)) => self.path_parent_all(prev), Ok(Token::Key(_)) => self.path_parent_key(prev), - Ok(Token::OpenArray(_)) => { - self.eat_token(); - self.array(prev) - } + Ok(Token::OpenArray(_)) => self.path_parent_array(prev), _ => Err(self.token_reader.to_error()), } } @@ -227,6 +224,18 @@ impl<'a> ParserImpl<'a> { }) } + #[allow(clippy::unnecessary_wraps)] + fn path_parent_array(&mut self, prev: ParserNode) -> Result { + debug!("#path_parent_array"); + self.eat_token(); + let prev = ParserNode { + token: ParseToken::Parent, + left: Some(Box::new(prev)), + right: None, + }; + self.array(prev) + } + fn key(&mut self) -> Result { debug!("#key"); match self.token_reader.next_token() { From ed02dce3963c7857f5962fd2005a6b97fef517ef Mon Sep 17 00:00:00 2001 From: matteomartelli Date: Tue, 30 Nov 2021 22:42:00 +0100 Subject: [PATCH 7/8] add additional tests for parent operator --- tests/parents.rs | 162 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 162 insertions(+) create mode 100644 tests/parents.rs diff --git a/tests/parents.rs b/tests/parents.rs new file mode 100644 index 00000000..ab5cc3f1 --- /dev/null +++ b/tests/parents.rs @@ -0,0 +1,162 @@ +#[macro_use] +extern crate serde_json; + +use common::{read_json, select_and_then_compare, setup}; + +mod common; + +#[test] +fn parent_of_root() { + setup(); + + for path in &[r#"$^*"#, r#"$^^*"#, r#"$^*^*", r#"$.first^^*"#] { + select_and_then_compare( + path, + json!({"first":"value"}), + json!([]), + ); + } +} + +#[test] +fn parent_key() { + setup(); + + for path in &[r#"$.store^expensive"#, r#"$.store.bicycle^^expensive"#] { + select_and_then_compare( + path, + read_json("./benchmark/example.json"), + json!([10]), + ); + } +} + +#[test] +fn parent_parent() { + setup(); + + select_and_then_compare( + r#"$.store.bicycle^^expensive"#, + read_json("./benchmark/example.json"), + json!([ + 10 + ]) + ); + + select_and_then_compare( + r#"$.store.book[0].author^^^bicycle"#, + read_json("./benchmark/example.json"), + json!([ + { + "color": "red", + "price": 19.95 + } + ]) + ); +} + +#[test] +fn parent_array() { + setup(); + + select_and_then_compare( + r#"$.store.book[1]^[0]"#, + read_json("./benchmark/example.json"), + json!([ + { + "category": "reference", + "author": "Nigel Rees", + "title": "Sayings of the Century", + "price": 8.95 + } + ]) + ); + + select_and_then_compare( + r#"$.store.book[*]^[0]"#, + read_json("./benchmark/example.json"), + json!([ + { + "category": "reference", + "author": "Nigel Rees", + "title": "Sayings of the Century", + "price": 8.95 + }, + { + "category": "reference", + "author": "Nigel Rees", + "title": "Sayings of the Century", + "price": 8.95 + }, + { + "category": "reference", + "author": "Nigel Rees", + "title": "Sayings of the Century", + "price": 8.95 + }, + { + "category": "reference", + "author": "Nigel Rees", + "title": "Sayings of the Century", + "price": 8.95 + } + ]) + ); +} + +#[test] +fn parent_all() { + setup(); + + select_and_then_compare( + r#"$.store.bicycle.color^*"#, + read_json("./benchmark/example.json"), + json!([ + "red", + 19.95 + ]) + ); + + select_and_then_compare( + r#"$.store.book[0].category^^*.author"#, + read_json("./benchmark/example.json"), + json!([ + "Nigel Rees", + "Evelyn Waugh", + "Herman Melville", + "J. R. R. Tolkien" + ]) + ); +} + +#[test] +fn parent_after_leaves() { + setup(); + + select_and_then_compare( + r#"$..author^title"#, + read_json("./benchmark/example.json"), + json!([ + "Sayings of the Century", + "Sword of Honour", + "Moby Dick", + "The Lord of the Rings" + ]) + ); +} + +#[test] +fn parent_after_filter() { + setup(); + + select_and_then_compare( + "$.store.book[?(@.price == 12.99)]^^bicycle", + read_json("./benchmark/example.json"), + json!([ + { + "color": "red", + "price": 19.95 + } + ]) + ); +} From d6413f02985a739e5f353463a4ce16714a7cfee8 Mon Sep 17 00:00:00 2001 From: matteomartelli Date: Wed, 1 Dec 2021 00:01:47 +0100 Subject: [PATCH 8/8] use entry().or_insert instead of contain_key();insert() this was suggested by clippy --- src/selector/value_walker.rs | 32 ++++++++------------------------ 1 file changed, 8 insertions(+), 24 deletions(-) diff --git a/src/selector/value_walker.rs b/src/selector/value_walker.rs index 101ec7a0..ea452826 100644 --- a/src/selector/value_walker.rs +++ b/src/selector/value_walker.rs @@ -14,18 +14,14 @@ impl<'a> ValueWalker { Value::Object(map) => { map.values().into_iter().for_each(|v_el| { let ptr_el = v_el as *const Value; - if !parents.contains_key(&ptr_el) { - parents.insert(ptr_el, v); - } + parents.entry(ptr_el).or_insert(v); acc.push(v_el) }) }, Value::Array(vec) => { vec.iter().for_each(|v_el| { let ptr_el = v_el as *const Value; - if !parents.contains_key(&ptr_el) { - parents.insert(ptr_el, v); - } + parents.entry(ptr_el).or_insert(v); acc.push(v_el) }) } @@ -40,9 +36,7 @@ impl<'a> ValueWalker { if let Value::Object(map) = v { if let Some(v_el) = map.get(key) { let ptr_el = v_el as *const Value; - if !parents.contains_key(&ptr_el) { - parents.insert(ptr_el, v); - } + parents.entry(ptr_el).or_insert(v); acc.push(v_el); } } @@ -55,9 +49,7 @@ impl<'a> ValueWalker { if let Value::Array(vec) = v { if let Some(v_el) = vec.get(utils::abs_index(index as isize, vec.len())) { let ptr_el = v_el as *const Value; - if !parents.contains_key(&ptr_el) { - parents.insert(ptr_el, v); - } + parents.entry(ptr_el).or_insert(v); acc.push(v_el); } } @@ -90,9 +82,7 @@ impl<'a> ValueWalker { if let Value::Object(map) = v { path_keys.iter().for_each(|pk| if let Some(v_el) = map.get(pk.get_key()) { let ptr_el = v_el as *const Value; - if !parents.contains_key(&ptr_el) { - parents.insert(ptr_el, v); - } + parents.entry(ptr_el).or_insert(v); acc.push(v_el) }); } @@ -132,18 +122,14 @@ impl<'a> ValueWalker { Value::Array(vec) => { vec.iter().for_each(|v_el| { let ptr_el = v_el as *const Value; - if !parents.contains_key(&ptr_el) { - parents.insert(ptr_el, v); - } + parents.entry(ptr_el).or_insert(v); Self::_walk(parents, v_el, acc, fun) }); } Value::Object(map) => { map.values().into_iter().for_each(|v_el| { let ptr_el = v_el as *const Value; - if !parents.contains_key(&ptr_el) { - parents.insert(ptr_el, v); - } + parents.entry(ptr_el).or_insert(v); Self::_walk(parents, v_el, acc, fun) }); } @@ -207,9 +193,7 @@ impl<'a> ValueWalker { } vec.iter().for_each(|v_el| { let ptr_el = v_el as *const Value; - if !parents.contains_key(&ptr_el) { - parents.insert(ptr_el, v); - } + parents.entry(ptr_el).or_insert(v); Self::walk_dedup(parents, v_el, key, visited, index, is_contain, is_not_contain, depth + 1); }) }