diff --git a/src/queryselector/parser.rs b/src/queryselector/parser.rs index 26a8aa2..a8df5f0 100644 --- a/src/queryselector/parser.rs +++ b/src/queryselector/parser.rs @@ -25,12 +25,17 @@ impl<'a> Parser<'a> { has_whitespace } + #[inline] fn read_identifier(&mut self) -> &'a [u8] { + self.read_while(util::is_ident) + } + + fn read_while(&mut self, mut predicate: impl FnMut(u8) -> bool) -> &'a [u8] { let start = self.stream.idx; while !self.stream.is_eof() { - let is_ident = self.stream.current().copied().map_or(false, util::is_ident); - if !is_ident { + let matched = self.stream.current().copied().map_or(false, &mut predicate); + if !matched { break; } else { self.stream.advance(); @@ -84,11 +89,13 @@ impl<'a> Parser<'a> { Some(b'=') => { self.stream.advance(); let quote = self.stream.expect_oneof_and_skip(&[b'"', b'\'']); - let value = self.read_identifier(); - if let Some(quote) = quote { - // Only require the given quote if the value starts with a quote + let value = if let Some(quote) = quote { + let value = self.read_while(|c| c != quote); self.stream.expect_and_skip(quote)?; - } + value + } else { + self.read_identifier() + }; self.stream.expect_and_skip(b']')?; Selector::AttributeValue(attribute, value) } @@ -96,11 +103,13 @@ impl<'a> Parser<'a> { self.stream.advance(); self.stream.expect_and_skip(b'=')?; let quote = self.stream.expect_oneof_and_skip(&[b'"', b'\'']); - let value = self.read_identifier(); - if let Some(quote) = quote { - // Only require the given quote if the value starts with a quote + let value = if let Some(quote) = quote { + let value = self.read_while(|c| c != quote); self.stream.expect_and_skip(quote)?; - } + value + } else { + self.read_identifier() + }; self.stream.expect_and_skip(b']')?; match c { b'~' => Selector::AttributeValueWhitespacedContains(attribute, value), diff --git a/src/tests.rs b/src/tests.rs index 046861a..2120b6b 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -585,11 +585,11 @@ mod query_selector { #[test] fn query_selector_with_quote() { - let input = r#"
"#; + let input = r#"
"#; let dom = parse(input, ParserOptions::default()).unwrap(); let parser = dom.parser(); let node_option = dom - .query_selector(r#"meta[property="og:title"]"#) + .query_selector(r#"meta[property="og:'title'"]"#) .and_then(|mut iter| iter.next()); let value = if let Some(node) = node_option { Some( diff --git a/src/util.rs b/src/util.rs index 15913eb..451ea48 100644 --- a/src/util.rs +++ b/src/util.rs @@ -5,9 +5,6 @@ pub fn is_ident(c: u8) -> bool { || (b'a'..=b'z').contains(&c) || c == b'-' || c == b'_' - || c == b':' - || c == b'+' - || c == b'/' } #[inline(always)]