Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 8 additions & 7 deletions src/parser/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,11 +87,11 @@ impl<'a> Parser<'a> {
self.stream.slice(start, start + end)
}

fn read_to4(&mut self, needle: [u8; 4]) -> &'a [u8] {
fn read_to3(&mut self, needle: [u8; 3]) -> &'a [u8] {
let start = self.stream.idx;
let bytes = &self.stream.data()[start..];

let end = simd::find4(bytes, needle).unwrap_or_else(|| self.stream.len() - start);
let end = simd::find3(bytes, needle).unwrap_or_else(|| self.stream.len() - start);

self.stream.idx += end;
self.stream.slice(start, start + end)
Expand All @@ -118,8 +118,7 @@ impl<'a> Parser<'a> {

// If we do not find any characters that are not identifiers
// then we are probably at the end of the stream
let end = simd::search_non_ident(bytes)
.unwrap_or_else(|| self.stream.len() - start);
let end = simd::search_non_ident(bytes).unwrap_or_else(|| self.stream.len() - start);

self.stream.idx += end;
Some(self.stream.slice(start, start + end))
Expand Down Expand Up @@ -163,7 +162,7 @@ impl<'a> Parser<'a> {
let value = if let Some(quote) = self.stream.expect_oneof_and_skip(&[b'"', b'\'']) {
self.read_to(quote)
} else {
self.read_to4([b' ', b'\n', b'/', b'>'])
self.read_to3([b' ', b'\n', b'>'])
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Per MDN, if the / isn't separated from the unquoted value by whitespace, it should be treated as part of the value. So I think it's correct to just omit it entirely here.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

};

Some((name, Some(value)))
Expand Down Expand Up @@ -219,10 +218,12 @@ impl<'a> Parser<'a> {
self.stream.advance();

let closing_tag_name = self.read_to(b'>');

self.stream.expect_and_skip_cond(b'>');

let closing_tag_matches_parent = self.stack.last()
let closing_tag_matches_parent = self
.stack
.last()
.and_then(|last_handle| last_handle.get(self))
.and_then(|last_item| last_item.as_tag())
.map_or(false, |last_tag| last_tag.name() == closing_tag_name);
Expand Down
6 changes: 3 additions & 3 deletions src/simd/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,11 @@ pub fn search_non_ident(haystack: &[u8]) -> Option<usize> {
)
}

/// Searches for the first occurence in `haystack`
/// Searches for the first occurrence in `haystack`
#[inline]
pub fn find4(haystack: &[u8], needle: [u8; 4]) -> Option<usize> {
pub fn find3(haystack: &[u8], needle: [u8; 3]) -> Option<usize> {
decide!(
nightly::find4(haystack, needle),
nightly::find3(haystack, needle),
stable::find_multi(haystack, needle)
)
}
Expand Down
10 changes: 4 additions & 6 deletions src/simd/nightly.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,11 @@ pub fn find(haystack: &[u8], needle: u8) -> Option<usize> {
fallback::find(&haystack[i..], needle).map(|x| i + x)
}

/// Optimized function for finding one of 4 bytes in `haystack`
pub fn find4(haystack: &[u8], needle: [u8; 4]) -> Option<usize> {
/// Optimized function for finding one of 3 bytes in `haystack`
pub fn find3(haystack: &[u8], needle: [u8; 3]) -> Option<usize> {
#[inline(never)]
#[cold]
fn unlikely_find(haystack: &[u8], needle: [u8; 4]) -> Option<usize> {
fn unlikely_find(haystack: &[u8], needle: [u8; 3]) -> Option<usize> {
fallback::find_multi(haystack, needle)
}

Expand All @@ -54,7 +54,6 @@ pub fn find4(haystack: &[u8], needle: [u8; 4]) -> Option<usize> {
let needle16a = u8x16::splat(needle[0]);
let needle16b = u8x16::splat(needle[1]);
let needle16c = u8x16::splat(needle[2]);
let needle16d = u8x16::splat(needle[3]);

while i <= len - 16 {
let mut bytes = [0; 16];
Expand All @@ -65,8 +64,7 @@ pub fn find4(haystack: &[u8], needle: [u8; 4]) -> Option<usize> {
let eq1 = bytes.simd_eq(needle16a);
let eq2 = bytes.simd_eq(needle16b);
let eq3 = bytes.simd_eq(needle16c);
let eq4 = bytes.simd_eq(needle16d);
let or = (eq1 | eq2 | eq3 | eq4).to_int();
let or = (eq1 | eq2 | eq3).to_int();
let num = unsafe { std::mem::transmute::<i8x16, u128>(or) };
if num != 0 {
return Some(i + (num.trailing_zeros() >> 3) as usize);
Expand Down
90 changes: 69 additions & 21 deletions src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -320,27 +320,25 @@ mod simd {
}

#[test]
fn string_search_4() {
const NEEDLE: [u8; 4] = [b'a', b'b', b'c', b'd'];

assert_eq!(crate::simd::find4(b"e", NEEDLE), None);
assert_eq!(crate::simd::find4(b"a", NEEDLE), Some(0));
assert_eq!(crate::simd::find4(b"ea", NEEDLE), Some(1));
assert_eq!(crate::simd::find4(b"ef", NEEDLE), None);
assert_eq!(crate::simd::find4(b"ef a", NEEDLE), Some(3));
assert_eq!(crate::simd::find4(b"ef g", NEEDLE), None);
assert_eq!(crate::simd::find4(b"ef ghijk", NEEDLE), None);
assert_eq!(crate::simd::find4(b"ef ghijkl", NEEDLE), None);
assert_eq!(crate::simd::find4(b"ef ghijkla", NEEDLE), Some(9));
assert_eq!(crate::simd::find4(b"ef ghiajklm", NEEDLE), Some(6));
assert_eq!(crate::simd::find4(b"ef ghibjklm", NEEDLE), Some(6));
assert_eq!(crate::simd::find4(b"ef ghicjklm", NEEDLE), Some(6));
assert_eq!(crate::simd::find4(b"ef ghidjklm", NEEDLE), Some(6));
assert_eq!(crate::simd::find4(b"ef ghijklmnopqrstua", NEEDLE), Some(18));
assert_eq!(crate::simd::find4(b"ef ghijklmnopqrstub", NEEDLE), Some(18));
assert_eq!(crate::simd::find4(b"ef ghijklmnopqrstuc", NEEDLE), Some(18));
assert_eq!(crate::simd::find4(b"ef ghijklmnopqrstud", NEEDLE), Some(18));
assert_eq!(crate::simd::find4(b"ef ghijklmnopqrstu", NEEDLE), None);
fn string_search_3() {
const NEEDLE: [u8; 3] = [b'a', b'b', b'c'];

assert_eq!(crate::simd::find3(b"e", NEEDLE), None);
assert_eq!(crate::simd::find3(b"a", NEEDLE), Some(0));
assert_eq!(crate::simd::find3(b"ea", NEEDLE), Some(1));
assert_eq!(crate::simd::find3(b"ef", NEEDLE), None);
assert_eq!(crate::simd::find3(b"ef a", NEEDLE), Some(3));
assert_eq!(crate::simd::find3(b"ef g", NEEDLE), None);
assert_eq!(crate::simd::find3(b"ef ghijk", NEEDLE), None);
assert_eq!(crate::simd::find3(b"ef ghijkl", NEEDLE), None);
assert_eq!(crate::simd::find3(b"ef ghijkla", NEEDLE), Some(9));
assert_eq!(crate::simd::find3(b"ef ghiajklm", NEEDLE), Some(6));
assert_eq!(crate::simd::find3(b"ef ghibjklm", NEEDLE), Some(6));
assert_eq!(crate::simd::find3(b"ef ghicjklm", NEEDLE), Some(6));
assert_eq!(crate::simd::find3(b"ef ghijklmnopqrstua", NEEDLE), Some(18));
assert_eq!(crate::simd::find3(b"ef ghijklmnopqrstub", NEEDLE), Some(18));
assert_eq!(crate::simd::find3(b"ef ghijklmnopqrstuc", NEEDLE), Some(18));
assert_eq!(crate::simd::find3(b"ef ghijklmnopqrstu", NEEDLE), None);
}

#[test]
Expand Down Expand Up @@ -510,6 +508,56 @@ fn unquoted() {
);
}

#[test]
fn unquoted_href() {
// https://github.com/y21/tl/issues/12
let input = r#"
<a id=u54423 href=https://www.google.com>Hello World</a>
"#;

let dom = parse(input, ParserOptions::default()).unwrap();
let parser = dom.parser();
let element = dom.get_element_by_id("u54423");

assert_eq!(
element.and_then(|x| x.get(parser).map(|x| x
.as_tag()
.unwrap()
.attributes()
.get("href")
.flatten()
.unwrap()
.try_as_utf8_str()
.unwrap()
.to_string())),
Some("https://www.google.com".into())
);
}

#[test]
fn unquoted_self_closing() {
// https://github.com/y21/tl/issues/12
let input = r#"
<a id=u54423 />
"#;

let dom = parse(input, ParserOptions::default()).unwrap();
let element = dom.get_element_by_id("u54423");

assert!(element.is_some());

// According to MDN, if there's no space between an unquoted attribute and the closing tag,
// the slash is treated as part of the attribute value.
let input = r#"
<a id=u54423/>
"#;

let dom = parse(input, ParserOptions::default()).unwrap();
let element = dom.get_element_by_id("u54423/");

assert!(element.is_some());
}

mod query_selector {
use super::*;
#[test]
Expand Down