Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,11 @@ pub use vdom::{VDom, VDomGuard};
/// assert_eq!(dom.query_selector("div").unwrap().count(), 1);
/// ```
pub fn parse(input: &str, options: ParserOptions) -> Result<VDom<'_>, ParseError> {
parse_bytes(input.as_bytes(), options)
}

/// Same as `parse()`, but with `&[u8]` input.
pub fn parse_bytes(input: &[u8], options: ParserOptions) -> Result<VDom<'_>, ParseError> {
let mut parser = Parser::new(input, options);
parser.parse()?;
Ok(VDom::from(parser))
Expand Down Expand Up @@ -83,5 +88,10 @@ pub fn parse_query_selector(input: &str) -> Option<Selector<'_>> {
/// Once `VDomGuard` goes out of scope, the string will be freed.
/// It should not be possible to cause UB in its current form and might become a safe function in the future.
pub unsafe fn parse_owned(input: String, options: ParserOptions) -> Result<VDomGuard, ParseError> {
parse_bytes_owned(input.into_bytes(), options)
}

/// Same as `parse_owned()`, but with `Vec<u8>` input.
pub unsafe fn parse_bytes_owned(input: Vec<u8>, options: ParserOptions) -> Result<VDomGuard, ParseError> {
VDomGuard::parse(input, options)
}
4 changes: 2 additions & 2 deletions src/parser/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,12 @@ pub struct Parser<'a> {
}

impl<'a> Parser<'a> {
pub(crate) fn new(input: &str, options: ParserOptions) -> Parser {
pub(crate) fn new(input: &[u8], options: ParserOptions) -> Parser {
Parser {
stack: Vec::with_capacity(4),
options,
tags: Vec::new(),
stream: Stream::new(input.as_bytes()),
stream: Stream::new(input),
ast: Vec::new(),
ids: HashMap::new(),
classes: HashMap::new(),
Expand Down
17 changes: 16 additions & 1 deletion src/tests.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::{parse, parse_owned, Bytes};
use crate::{parse, parse_bytes, parse_owned, Bytes};
use crate::{parser::*, HTMLTag, Node};

fn force_as_tag<'a, 'b>(actual: &'a Node<'b>) -> &'a HTMLTag<'b> {
Expand Down Expand Up @@ -778,3 +778,18 @@ fn tag_raw_abrupt_stop() {
let from_raw = first_tag.raw().try_as_utf8_str().unwrap();
assert_eq!(from_raw, "<p>abcd</p");
}

#[test]
fn non_utf8() {
let input = b"<p>\xc3\x28</p>".as_ref();

let vdom = parse_bytes(input, Default::default()).unwrap();

let first_tag = vdom.children()[0]
.get(vdom.parser())
.unwrap()
.as_tag()
.unwrap();

assert_eq!(first_tag.raw().as_bytes(), b"<p>\xc3\x28</p>");
}
28 changes: 14 additions & 14 deletions src/vdom.rs
Original file line number Diff line number Diff line change
Expand Up @@ -188,33 +188,33 @@ pub struct VDomGuard {
/// Wrapped VDom instance
dom: VDom<'static>,
/// The leaked input string that is referenced by self.dom
_s: RawString,
_b: RawBytes,
/// PhantomData for self.dom
_phantom: PhantomData<&'static str>,
_phantom: PhantomData<&'static [u8]>,
}

unsafe impl Send for VDomGuard {}
unsafe impl Sync for VDomGuard {}

impl VDomGuard {
/// Parses the input string
pub(crate) fn parse(input: String, options: ParserOptions) -> Result<VDomGuard, ParseError> {
let input = RawString::new(input);
pub(crate) fn parse(input: Vec<u8>, options: ParserOptions) -> Result<VDomGuard, ParseError> {
let input = RawBytes::new(input);

let ptr = input.as_ptr();

let input_ref: &'static str = unsafe { &*ptr };
let input_ref: &'static [u8] = unsafe { &*ptr };

// Parsing will either:
// a) succeed, and we return a VDom instance
// that, when dropped, will free the input string
// b) fail, and we return a ParseError
// and `RawString`s destructor will run and deallocate the string properly
// and `RawBytes`s destructor will run and deallocate the string properly
let mut parser = Parser::new(input_ref, options);
parser.parse()?;

Ok(Self {
_s: input,
_b: input,
dom: VDom::from(parser),
_phantom: PhantomData,
})
Expand All @@ -238,21 +238,21 @@ impl VDomGuard {
}

#[derive(Debug)]
struct RawString(*mut str);
struct RawBytes(*mut [u8]);

impl RawString {
pub fn new(s: String) -> Self {
Self(Box::into_raw(s.into_boxed_str()))
impl RawBytes {
pub fn new(s: Vec<u8>) -> Self {
Self(Box::into_raw(s.into_boxed_slice()))
}

pub fn as_ptr(&self) -> *mut str {
pub fn as_ptr(&self) -> *mut [u8] {
self.0
}
}

impl Drop for RawString {
impl Drop for RawBytes {
fn drop(&mut self) {
// SAFETY: the pointer is always valid because `RawString` can only be constructed through `RawString::new()`
// SAFETY: the pointer is always valid because `RawBytes` can only be constructed through `RawBytes::new()`
unsafe {
drop(Box::from_raw(self.0));
};
Expand Down