diff --git a/src/parser/base.rs b/src/parser/base.rs index 7b8c498..dc26344 100644 --- a/src/parser/base.rs +++ b/src/parser/base.rs @@ -87,11 +87,11 @@ impl<'a> Parser<'a> { self.stream.slice(start, start + end) } - fn read_to4(&mut self, needle: [u8; 4]) -> &'a [u8] { + fn read_to3(&mut self, needle: [u8; 3]) -> &'a [u8] { let start = self.stream.idx; let bytes = &self.stream.data()[start..]; - let end = simd::find4(bytes, needle).unwrap_or_else(|| self.stream.len() - start); + let end = simd::find3(bytes, needle).unwrap_or_else(|| self.stream.len() - start); self.stream.idx += end; self.stream.slice(start, start + end) @@ -118,8 +118,7 @@ impl<'a> Parser<'a> { // If we do not find any characters that are not identifiers // then we are probably at the end of the stream - let end = simd::search_non_ident(bytes) - .unwrap_or_else(|| self.stream.len() - start); + let end = simd::search_non_ident(bytes).unwrap_or_else(|| self.stream.len() - start); self.stream.idx += end; Some(self.stream.slice(start, start + end)) @@ -163,7 +162,7 @@ impl<'a> Parser<'a> { let value = if let Some(quote) = self.stream.expect_oneof_and_skip(&[b'"', b'\'']) { self.read_to(quote) } else { - self.read_to4([b' ', b'\n', b'/', b'>']) + self.read_to3([b' ', b'\n', b'>']) }; Some((name, Some(value))) @@ -219,10 +218,12 @@ impl<'a> Parser<'a> { self.stream.advance(); let closing_tag_name = self.read_to(b'>'); - + self.stream.expect_and_skip_cond(b'>'); - let closing_tag_matches_parent = self.stack.last() + let closing_tag_matches_parent = self + .stack + .last() .and_then(|last_handle| last_handle.get(self)) .and_then(|last_item| last_item.as_tag()) .map_or(false, |last_tag| last_tag.name() == closing_tag_name); diff --git a/src/simd/mod.rs b/src/simd/mod.rs index 3693105..6d5ba65 100644 --- a/src/simd/mod.rs +++ b/src/simd/mod.rs @@ -37,11 +37,11 @@ pub fn search_non_ident(haystack: &[u8]) -> Option { ) } -/// Searches for the first occurence in `haystack` +/// Searches for the first occurrence in `haystack` #[inline] -pub fn find4(haystack: &[u8], needle: [u8; 4]) -> Option { +pub fn find3(haystack: &[u8], needle: [u8; 3]) -> Option { decide!( - nightly::find4(haystack, needle), + nightly::find3(haystack, needle), stable::find_multi(haystack, needle) ) } diff --git a/src/simd/nightly.rs b/src/simd/nightly.rs index 772157d..86dd0c7 100644 --- a/src/simd/nightly.rs +++ b/src/simd/nightly.rs @@ -36,11 +36,11 @@ pub fn find(haystack: &[u8], needle: u8) -> Option { fallback::find(&haystack[i..], needle).map(|x| i + x) } -/// Optimized function for finding one of 4 bytes in `haystack` -pub fn find4(haystack: &[u8], needle: [u8; 4]) -> Option { +/// Optimized function for finding one of 3 bytes in `haystack` +pub fn find3(haystack: &[u8], needle: [u8; 3]) -> Option { #[inline(never)] #[cold] - fn unlikely_find(haystack: &[u8], needle: [u8; 4]) -> Option { + fn unlikely_find(haystack: &[u8], needle: [u8; 3]) -> Option { fallback::find_multi(haystack, needle) } @@ -54,7 +54,6 @@ pub fn find4(haystack: &[u8], needle: [u8; 4]) -> Option { let needle16a = u8x16::splat(needle[0]); let needle16b = u8x16::splat(needle[1]); let needle16c = u8x16::splat(needle[2]); - let needle16d = u8x16::splat(needle[3]); while i <= len - 16 { let mut bytes = [0; 16]; @@ -65,8 +64,7 @@ pub fn find4(haystack: &[u8], needle: [u8; 4]) -> Option { let eq1 = bytes.simd_eq(needle16a); let eq2 = bytes.simd_eq(needle16b); let eq3 = bytes.simd_eq(needle16c); - let eq4 = bytes.simd_eq(needle16d); - let or = (eq1 | eq2 | eq3 | eq4).to_int(); + let or = (eq1 | eq2 | eq3).to_int(); let num = unsafe { std::mem::transmute::(or) }; if num != 0 { return Some(i + (num.trailing_zeros() >> 3) as usize); diff --git a/src/tests.rs b/src/tests.rs index 046861a..7b1b9d0 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -320,27 +320,25 @@ mod simd { } #[test] - fn string_search_4() { - const NEEDLE: [u8; 4] = [b'a', b'b', b'c', b'd']; - - assert_eq!(crate::simd::find4(b"e", NEEDLE), None); - assert_eq!(crate::simd::find4(b"a", NEEDLE), Some(0)); - assert_eq!(crate::simd::find4(b"ea", NEEDLE), Some(1)); - assert_eq!(crate::simd::find4(b"ef", NEEDLE), None); - assert_eq!(crate::simd::find4(b"ef a", NEEDLE), Some(3)); - assert_eq!(crate::simd::find4(b"ef g", NEEDLE), None); - assert_eq!(crate::simd::find4(b"ef ghijk", NEEDLE), None); - assert_eq!(crate::simd::find4(b"ef ghijkl", NEEDLE), None); - assert_eq!(crate::simd::find4(b"ef ghijkla", NEEDLE), Some(9)); - assert_eq!(crate::simd::find4(b"ef ghiajklm", NEEDLE), Some(6)); - assert_eq!(crate::simd::find4(b"ef ghibjklm", NEEDLE), Some(6)); - assert_eq!(crate::simd::find4(b"ef ghicjklm", NEEDLE), Some(6)); - assert_eq!(crate::simd::find4(b"ef ghidjklm", NEEDLE), Some(6)); - assert_eq!(crate::simd::find4(b"ef ghijklmnopqrstua", NEEDLE), Some(18)); - assert_eq!(crate::simd::find4(b"ef ghijklmnopqrstub", NEEDLE), Some(18)); - assert_eq!(crate::simd::find4(b"ef ghijklmnopqrstuc", NEEDLE), Some(18)); - assert_eq!(crate::simd::find4(b"ef ghijklmnopqrstud", NEEDLE), Some(18)); - assert_eq!(crate::simd::find4(b"ef ghijklmnopqrstu", NEEDLE), None); + fn string_search_3() { + const NEEDLE: [u8; 3] = [b'a', b'b', b'c']; + + assert_eq!(crate::simd::find3(b"e", NEEDLE), None); + assert_eq!(crate::simd::find3(b"a", NEEDLE), Some(0)); + assert_eq!(crate::simd::find3(b"ea", NEEDLE), Some(1)); + assert_eq!(crate::simd::find3(b"ef", NEEDLE), None); + assert_eq!(crate::simd::find3(b"ef a", NEEDLE), Some(3)); + assert_eq!(crate::simd::find3(b"ef g", NEEDLE), None); + assert_eq!(crate::simd::find3(b"ef ghijk", NEEDLE), None); + assert_eq!(crate::simd::find3(b"ef ghijkl", NEEDLE), None); + assert_eq!(crate::simd::find3(b"ef ghijkla", NEEDLE), Some(9)); + assert_eq!(crate::simd::find3(b"ef ghiajklm", NEEDLE), Some(6)); + assert_eq!(crate::simd::find3(b"ef ghibjklm", NEEDLE), Some(6)); + assert_eq!(crate::simd::find3(b"ef ghicjklm", NEEDLE), Some(6)); + assert_eq!(crate::simd::find3(b"ef ghijklmnopqrstua", NEEDLE), Some(18)); + assert_eq!(crate::simd::find3(b"ef ghijklmnopqrstub", NEEDLE), Some(18)); + assert_eq!(crate::simd::find3(b"ef ghijklmnopqrstuc", NEEDLE), Some(18)); + assert_eq!(crate::simd::find3(b"ef ghijklmnopqrstu", NEEDLE), None); } #[test] @@ -510,6 +508,56 @@ fn unquoted() { ); } +#[test] +fn unquoted_href() { + // https://github.com/y21/tl/issues/12 + let input = r#" + Hello World + "#; + + let dom = parse(input, ParserOptions::default()).unwrap(); + let parser = dom.parser(); + let element = dom.get_element_by_id("u54423"); + + assert_eq!( + element.and_then(|x| x.get(parser).map(|x| x + .as_tag() + .unwrap() + .attributes() + .get("href") + .flatten() + .unwrap() + .try_as_utf8_str() + .unwrap() + .to_string())), + Some("https://www.google.com".into()) + ); +} + +#[test] +fn unquoted_self_closing() { + // https://github.com/y21/tl/issues/12 + let input = r#" + + "#; + + let dom = parse(input, ParserOptions::default()).unwrap(); + let element = dom.get_element_by_id("u54423"); + + assert!(element.is_some()); + + // According to MDN, if there's no space between an unquoted attribute and the closing tag, + // the slash is treated as part of the attribute value. + let input = r#" + + "#; + + let dom = parse(input, ParserOptions::default()).unwrap(); + let element = dom.get_element_by_id("u54423/"); + + assert!(element.is_some()); +} + mod query_selector { use super::*; #[test]