diff --git a/Cargo.fuzz.toml b/Cargo.fuzz.toml
new file mode 100644
index 0000000..58c5606
--- /dev/null
+++ b/Cargo.fuzz.toml
@@ -0,0 +1,21 @@
+[package]
+name = "capsule-fuzz"
+version = "0.0.0"
+authors = ["Automatically generated"]
+publish = false
+edition = "2024"
+
+[package.metadata]
+cargo-fuzz = true
+
+[dependencies]
+libfuzzer-sys = "0.4"
+capsule = { path = ".." }
+bytes = "1.5"
+chrono = "0.4"
+reqwest = "0.12"
+url = "2.5"
+
+[[bin]]
+name = "extractor"
+path = "fuzz_targets/extractor.rs"
diff --git a/Cargo.lock b/Cargo.lock
index 1248621..146efbe 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -17,6 +17,18 @@ version = "2.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
+[[package]]
+name = "ahash"
+version = "0.8.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75"
+dependencies = [
+ "cfg-if",
+ "once_cell",
+ "version_check",
+ "zerocopy",
+]
+
[[package]]
name = "aho-corasick"
version = "1.1.3"
@@ -47,6 +59,19 @@ version = "0.2.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
+[[package]]
+name = "ammonia"
+version = "3.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "64e6d1c7838db705c9b756557ee27c384ce695a1c51a6fe528784cb1c6840170"
+dependencies = [
+ "html5ever 0.26.0",
+ "maplit",
+ "once_cell",
+ "tendril",
+ "url",
+]
+
[[package]]
name = "android-tzdata"
version = "0.1.1"
@@ -135,7 +160,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb"
dependencies = [
"proc-macro2",
"quote",
- "syn",
+ "syn 2.0.106",
]
[[package]]
@@ -169,10 +194,10 @@ dependencies = [
"axum-core 0.4.5",
"bytes",
"futures-util",
- "http",
- "http-body",
+ "http 1.3.1",
+ "http-body 1.0.1",
"http-body-util",
- "itoa",
+ "itoa 1.0.15",
"matchit 0.7.3",
"memchr",
"mime",
@@ -180,7 +205,7 @@ dependencies = [
"pin-project-lite",
"rustversion",
"serde",
- "sync_wrapper",
+ "sync_wrapper 1.0.2",
"tower",
"tower-layer",
"tower-service",
@@ -196,12 +221,12 @@ dependencies = [
"bytes",
"form_urlencoded",
"futures-util",
- "http",
- "http-body",
+ "http 1.3.1",
+ "http-body 1.0.1",
"http-body-util",
- "hyper",
+ "hyper 1.7.0",
"hyper-util",
- "itoa",
+ "itoa 1.0.15",
"matchit 0.8.4",
"memchr",
"mime",
@@ -212,7 +237,7 @@ dependencies = [
"serde_json",
"serde_path_to_error",
"serde_urlencoded",
- "sync_wrapper",
+ "sync_wrapper 1.0.2",
"tokio",
"tower",
"tower-layer",
@@ -229,13 +254,13 @@ dependencies = [
"async-trait",
"bytes",
"futures-util",
- "http",
- "http-body",
+ "http 1.3.1",
+ "http-body 1.0.1",
"http-body-util",
"mime",
"pin-project-lite",
"rustversion",
- "sync_wrapper",
+ "sync_wrapper 1.0.2",
"tower-layer",
"tower-service",
]
@@ -248,13 +273,13 @@ checksum = "68464cd0412f486726fb3373129ef5d2993f90c34bc2bc1c1e9943b2f4fc7ca6"
dependencies = [
"bytes",
"futures-core",
- "http",
- "http-body",
+ "http 1.3.1",
+ "http-body 1.0.1",
"http-body-util",
"mime",
"pin-project-lite",
"rustversion",
- "sync_wrapper",
+ "sync_wrapper 1.0.2",
"tower-layer",
"tower-service",
"tracing",
@@ -275,6 +300,12 @@ dependencies = [
"windows-targets 0.52.6",
]
+[[package]]
+name = "base64"
+version = "0.21.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567"
+
[[package]]
name = "base64"
version = "0.22.1"
@@ -287,6 +318,27 @@ version = "1.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "55248b47b0caf0546f7988906588779981c43bb1bc9d0c44087278f80cdb44ba"
+[[package]]
+name = "bit-set"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3"
+dependencies = [
+ "bit-vec",
+]
+
+[[package]]
+name = "bit-vec"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7"
+
+[[package]]
+name = "bitflags"
+version = "1.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
+
[[package]]
name = "bitflags"
version = "2.9.2"
@@ -345,7 +397,7 @@ dependencies = [
"proc-macro2",
"quote",
"rustversion",
- "syn",
+ "syn 2.0.106",
]
[[package]]
@@ -391,6 +443,7 @@ checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a"
name = "capsule"
version = "0.1.0"
dependencies = [
+ "ammonia",
"anyhow",
"argon2",
"async-trait",
@@ -401,14 +454,19 @@ dependencies = [
"dashmap",
"encoding_rs",
"flate2",
- "hyper",
+ "hyper 1.7.0",
"jsonwebtoken",
+ "kuchiki",
+ "linkify",
"md5",
"mockall",
"once_cell",
- "rand",
+ "percent-encoding",
+ "proptest",
+ "rand 0.8.5",
+ "readability",
"regex",
- "reqwest",
+ "reqwest 0.12.23",
"scraper",
"serde",
"serde_json",
@@ -426,6 +484,7 @@ dependencies = [
"utoipa-axum",
"utoipa-swagger-ui",
"uuid",
+ "whatlang",
"wiremock",
]
@@ -513,6 +572,12 @@ version = "0.9.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8"
+[[package]]
+name = "convert_case"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e"
+
[[package]]
name = "core-foundation"
version = "0.9.4"
@@ -621,6 +686,23 @@ dependencies = [
"typenum",
]
+[[package]]
+name = "cssparser"
+version = "0.27.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "754b69d351cdc2d8ee09ae203db831e005560fc6030da058f86ad60c92a9cb0a"
+dependencies = [
+ "cssparser-macros",
+ "dtoa-short",
+ "itoa 0.4.8",
+ "matches",
+ "phf 0.8.0",
+ "proc-macro2",
+ "quote",
+ "smallvec",
+ "syn 1.0.109",
+]
+
[[package]]
name = "cssparser"
version = "0.35.0"
@@ -629,8 +711,8 @@ checksum = "4e901edd733a1472f944a45116df3f846f54d37e67e68640ac8bb69689aca2aa"
dependencies = [
"cssparser-macros",
"dtoa-short",
- "itoa",
- "phf",
+ "itoa 1.0.15",
+ "phf 0.11.3",
"smallvec",
]
@@ -641,7 +723,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331"
dependencies = [
"quote",
- "syn",
+ "syn 2.0.106",
]
[[package]]
@@ -665,7 +747,7 @@ dependencies = [
"proc-macro2",
"quote",
"strsim",
- "syn",
+ "syn 2.0.106",
]
[[package]]
@@ -676,7 +758,7 @@ checksum = "ce154b9bea7fb0c8e8326e62d00354000c36e79770ff21b8c84e3aa267d9d531"
dependencies = [
"darling_core",
"quote",
- "syn",
+ "syn 2.0.106",
]
[[package]]
@@ -739,7 +821,20 @@ checksum = "1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a"
dependencies = [
"proc-macro2",
"quote",
- "syn",
+ "syn 2.0.106",
+]
+
+[[package]]
+name = "derive_more"
+version = "0.99.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6edb4b64a43d977b8e99788fe3a04d483834fba1215a7e02caa415b626497f7f"
+dependencies = [
+ "convert_case",
+ "proc-macro2",
+ "quote",
+ "rustc_version",
+ "syn 2.0.106",
]
[[package]]
@@ -759,7 +854,7 @@ checksum = "bda628edc44c4bb645fbe0f758797143e4e07926f7ebf4e9bdfbd3d2ce621df3"
dependencies = [
"proc-macro2",
"quote",
- "syn",
+ "syn 2.0.106",
]
[[package]]
@@ -782,7 +877,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
dependencies = [
"proc-macro2",
"quote",
- "syn",
+ "syn 2.0.106",
]
[[package]]
@@ -1043,7 +1138,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
dependencies = [
"proc-macro2",
"quote",
- "syn",
+ "syn 2.0.106",
]
[[package]]
@@ -1104,6 +1199,17 @@ dependencies = [
"unicode-width",
]
+[[package]]
+name = "getrandom"
+version = "0.1.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "wasi 0.9.0+wasi-snapshot-preview1",
+]
+
[[package]]
name = "getrandom"
version = "0.2.16"
@@ -1135,6 +1241,25 @@ version = "0.31.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f"
+[[package]]
+name = "h2"
+version = "0.3.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0beca50380b1fc32983fc1cb4587bfa4bb9e78fc259aad4a0032d2080309222d"
+dependencies = [
+ "bytes",
+ "fnv",
+ "futures-core",
+ "futures-sink",
+ "futures-util",
+ "http 0.2.12",
+ "indexmap",
+ "slab",
+ "tokio",
+ "tokio-util",
+ "tracing",
+]
+
[[package]]
name = "h2"
version = "0.4.12"
@@ -1146,7 +1271,7 @@ dependencies = [
"fnv",
"futures-core",
"futures-sink",
- "http",
+ "http 1.3.1",
"indexmap",
"slab",
"tokio",
@@ -1159,6 +1284,10 @@ name = "hashbrown"
version = "0.14.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
+dependencies = [
+ "ahash",
+ "allocator-api2",
+]
[[package]]
name = "hashbrown"
@@ -1225,6 +1354,34 @@ dependencies = [
"windows-sys 0.59.0",
]
+[[package]]
+name = "html5ever"
+version = "0.25.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e5c13fb08e5d4dfc151ee5e88bae63f7773d61852f3bdc73c9f4b9e1bde03148"
+dependencies = [
+ "log",
+ "mac",
+ "markup5ever 0.10.1",
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+]
+
+[[package]]
+name = "html5ever"
+version = "0.26.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bea68cab48b8459f17cf1c944c67ddc572d272d9f2b274140f223ecb1da4a3b7"
+dependencies = [
+ "log",
+ "mac",
+ "markup5ever 0.11.0",
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+]
+
[[package]]
name = "html5ever"
version = "0.35.0"
@@ -1232,7 +1389,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "55d958c2f74b664487a2035fe1dadb032c48718a03b63f3ab0b8537db8549ed4"
dependencies = [
"log",
- "markup5ever",
+ "markup5ever 0.35.0",
"match_token",
]
@@ -1242,6 +1399,17 @@ version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e9025058dae765dee5070ec375f591e2ba14638c63feff74f13805a72e523163"
+[[package]]
+name = "http"
+version = "0.2.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1"
+dependencies = [
+ "bytes",
+ "fnv",
+ "itoa 1.0.15",
+]
+
[[package]]
name = "http"
version = "1.3.1"
@@ -1250,7 +1418,18 @@ checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565"
dependencies = [
"bytes",
"fnv",
- "itoa",
+ "itoa 1.0.15",
+]
+
+[[package]]
+name = "http-body"
+version = "0.4.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2"
+dependencies = [
+ "bytes",
+ "http 0.2.12",
+ "pin-project-lite",
]
[[package]]
@@ -1260,7 +1439,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184"
dependencies = [
"bytes",
- "http",
+ "http 1.3.1",
]
[[package]]
@@ -1271,8 +1450,8 @@ checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a"
dependencies = [
"bytes",
"futures-core",
- "http",
- "http-body",
+ "http 1.3.1",
+ "http-body 1.0.1",
"pin-project-lite",
]
@@ -1288,6 +1467,30 @@ version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9"
+[[package]]
+name = "hyper"
+version = "0.14.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7"
+dependencies = [
+ "bytes",
+ "futures-channel",
+ "futures-core",
+ "futures-util",
+ "h2 0.3.27",
+ "http 0.2.12",
+ "http-body 0.4.6",
+ "httparse",
+ "httpdate",
+ "itoa 1.0.15",
+ "pin-project-lite",
+ "socket2 0.5.10",
+ "tokio",
+ "tower-service",
+ "tracing",
+ "want",
+]
+
[[package]]
name = "hyper"
version = "1.7.0"
@@ -1298,12 +1501,12 @@ dependencies = [
"bytes",
"futures-channel",
"futures-core",
- "h2",
- "http",
- "http-body",
+ "h2 0.4.12",
+ "http 1.3.1",
+ "http-body 1.0.1",
"httparse",
"httpdate",
- "itoa",
+ "itoa 1.0.15",
"pin-project-lite",
"pin-utils",
"smallvec",
@@ -1317,8 +1520,8 @@ version = "0.27.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58"
dependencies = [
- "http",
- "hyper",
+ "http 1.3.1",
+ "hyper 1.7.0",
"hyper-util",
"rustls",
"rustls-pki-types",
@@ -1327,6 +1530,19 @@ dependencies = [
"tower-service",
]
+[[package]]
+name = "hyper-tls"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905"
+dependencies = [
+ "bytes",
+ "hyper 0.14.32",
+ "native-tls",
+ "tokio",
+ "tokio-native-tls",
+]
+
[[package]]
name = "hyper-tls"
version = "0.6.0"
@@ -1335,7 +1551,7 @@ checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0"
dependencies = [
"bytes",
"http-body-util",
- "hyper",
+ "hyper 1.7.0",
"hyper-util",
"native-tls",
"tokio",
@@ -1349,20 +1565,20 @@ version = "0.1.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8d9b05277c7e8da2c93a568989bb6207bef0112e8d17df7a6eda4a3cf143bc5e"
dependencies = [
- "base64",
+ "base64 0.22.1",
"bytes",
"futures-channel",
"futures-core",
"futures-util",
- "http",
- "http-body",
- "hyper",
+ "http 1.3.1",
+ "http-body 1.0.1",
+ "hyper 1.7.0",
"ipnet",
"libc",
"percent-encoding",
"pin-project-lite",
- "socket2",
- "system-configuration",
+ "socket2 0.6.0",
+ "system-configuration 0.6.1",
"tokio",
"tower-service",
"tracing",
@@ -1532,7 +1748,7 @@ version = "0.7.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d93587f37623a1a17d94ef2bc9ada592f5465fe7732084ab7beefabe5c77c0c4"
dependencies = [
- "bitflags",
+ "bitflags 2.9.2",
"cfg-if",
"libc",
]
@@ -1562,6 +1778,12 @@ dependencies = [
"either",
]
+[[package]]
+name = "itoa"
+version = "0.4.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4"
+
[[package]]
name = "itoa"
version = "1.0.15"
@@ -1594,7 +1816,7 @@ version = "9.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a87cc7a48537badeae96744432de36f4be2b4a34a05a5ef32e9dd8a1c169dde"
dependencies = [
- "base64",
+ "base64 0.22.1",
"js-sys",
"pem",
"ring",
@@ -1603,6 +1825,18 @@ dependencies = [
"simple_asn1",
]
+[[package]]
+name = "kuchiki"
+version = "0.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1ea8e9c6e031377cff82ee3001dc8026cdf431ed4e2e6b51f98ab8c73484a358"
+dependencies = [
+ "cssparser 0.27.2",
+ "html5ever 0.25.2",
+ "matches",
+ "selectors 0.22.0",
+]
+
[[package]]
name = "lazy_static"
version = "1.5.0"
@@ -1636,7 +1870,7 @@ version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "391290121bad3d37fbddad76d8f5d1c1c314cfc646d143d7e07a3086ddff0ce3"
dependencies = [
- "bitflags",
+ "bitflags 2.9.2",
"libc",
"redox_syscall",
]
@@ -1660,6 +1894,15 @@ dependencies = [
"zlib-rs",
]
+[[package]]
+name = "linkify"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f1dfa36d52c581e9ec783a7ce2a5e0143da6237be5811a0b3153fedfdbe9f780"
+dependencies = [
+ "memchr",
+]
+
[[package]]
name = "linux-raw-sys"
version = "0.4.15"
@@ -1715,6 +1958,40 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
+[[package]]
+name = "maplit"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d"
+
+[[package]]
+name = "markup5ever"
+version = "0.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a24f40fb03852d1cdd84330cddcaf98e9ec08a7b7768e952fad3b4cf048ec8fd"
+dependencies = [
+ "log",
+ "phf 0.8.0",
+ "phf_codegen 0.8.0",
+ "string_cache",
+ "string_cache_codegen",
+ "tendril",
+]
+
+[[package]]
+name = "markup5ever"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7a2629bb1404f3d34c2e921f21fd34ba00b206124c81f65c50b43b6aaefeb016"
+dependencies = [
+ "log",
+ "phf 0.10.1",
+ "phf_codegen 0.10.0",
+ "string_cache",
+ "string_cache_codegen",
+ "tendril",
+]
+
[[package]]
name = "markup5ever"
version = "0.35.0"
@@ -1726,6 +2003,18 @@ dependencies = [
"web_atoms",
]
+[[package]]
+name = "markup5ever_rcdom"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f015da43bcd8d4f144559a3423f4591d69b8ce0652c905374da7205df336ae2b"
+dependencies = [
+ "html5ever 0.25.2",
+ "markup5ever 0.10.1",
+ "tendril",
+ "xml5ever",
+]
+
[[package]]
name = "match_token"
version = "0.35.0"
@@ -1734,7 +2023,7 @@ checksum = "ac84fd3f360fcc43dc5f5d186f02a94192761a080e8bc58621ad4d12296a58cf"
dependencies = [
"proc-macro2",
"quote",
- "syn",
+ "syn 2.0.106",
]
[[package]]
@@ -1746,6 +2035,12 @@ dependencies = [
"regex-automata 0.1.10",
]
+[[package]]
+name = "matches"
+version = "0.1.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2532096657941c2fea9c289d370a250971c689d4f143798ff67113ec042024a5"
+
[[package]]
name = "matchit"
version = "0.7.3"
@@ -1863,7 +2158,7 @@ dependencies = [
"cfg-if",
"proc-macro2",
"quote",
- "syn",
+ "syn 2.0.106",
]
[[package]]
@@ -1895,6 +2190,12 @@ version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086"
+[[package]]
+name = "nodrop"
+version = "0.1.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb"
+
[[package]]
name = "nom"
version = "7.1.3"
@@ -1937,7 +2238,7 @@ dependencies = [
"num-integer",
"num-iter",
"num-traits",
- "rand",
+ "rand 0.8.5",
"smallvec",
"zeroize",
]
@@ -2015,7 +2316,7 @@ version = "0.10.73"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8505734d46c8ab1e19a1dce3aef597ad87dcb4c37e7188231769bd6bd51cebf8"
dependencies = [
- "bitflags",
+ "bitflags 2.9.2",
"cfg-if",
"foreign-types",
"libc",
@@ -2032,7 +2333,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
dependencies = [
"proc-macro2",
"quote",
- "syn",
+ "syn 2.0.106",
]
[[package]]
@@ -2104,7 +2405,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "346f04948ba92c43e8469c1ee6736c7563d71012b17d40745260fe106aac2166"
dependencies = [
"base64ct",
- "rand_core",
+ "rand_core 0.6.4",
"subtle",
]
@@ -2120,7 +2421,7 @@ version = "3.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38af38e8470ac9dee3ce1bae1af9c1671fffc44ddfd8bd1d0a3445bf349a8ef3"
dependencies = [
- "base64",
+ "base64 0.22.1",
"serde",
]
@@ -2139,14 +2440,54 @@ version = "2.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
+[[package]]
+name = "phf"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3dfb61232e34fcb633f43d12c58f83c1df82962dcdfa565a4e866ffc17dafe12"
+dependencies = [
+ "phf_macros 0.8.0",
+ "phf_shared 0.8.0",
+ "proc-macro-hack",
+]
+
+[[package]]
+name = "phf"
+version = "0.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fabbf1ead8a5bcbc20f5f8b939ee3f5b0f6f281b6ad3468b84656b658b455259"
+dependencies = [
+ "phf_shared 0.10.0",
+]
+
[[package]]
name = "phf"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078"
dependencies = [
- "phf_macros",
- "phf_shared",
+ "phf_macros 0.11.3",
+ "phf_shared 0.11.3",
+]
+
+[[package]]
+name = "phf_codegen"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cbffee61585b0411840d3ece935cce9cb6321f01c45477d30066498cd5e1a815"
+dependencies = [
+ "phf_generator 0.8.0",
+ "phf_shared 0.8.0",
+]
+
+[[package]]
+name = "phf_codegen"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4fb1c3a8bc4dd4e5cfce29b44ffc14bedd2ee294559a294e2a4d4c9e9a6a13cd"
+dependencies = [
+ "phf_generator 0.10.0",
+ "phf_shared 0.10.0",
]
[[package]]
@@ -2155,8 +2496,28 @@ version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a"
dependencies = [
- "phf_generator",
- "phf_shared",
+ "phf_generator 0.11.3",
+ "phf_shared 0.11.3",
+]
+
+[[package]]
+name = "phf_generator"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "17367f0cc86f2d25802b2c26ee58a7b23faeccf78a396094c13dced0d0182526"
+dependencies = [
+ "phf_shared 0.8.0",
+ "rand 0.7.3",
+]
+
+[[package]]
+name = "phf_generator"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6"
+dependencies = [
+ "phf_shared 0.10.0",
+ "rand 0.8.5",
]
[[package]]
@@ -2165,8 +2526,22 @@ version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d"
dependencies = [
- "phf_shared",
- "rand",
+ "phf_shared 0.11.3",
+ "rand 0.8.5",
+]
+
+[[package]]
+name = "phf_macros"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f6fde18ff429ffc8fe78e2bf7f8b7a5a5a6e2a8b58bc5a9ac69198bbda9189c"
+dependencies = [
+ "phf_generator 0.8.0",
+ "phf_shared 0.8.0",
+ "proc-macro-hack",
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
]
[[package]]
@@ -2175,11 +2550,29 @@ version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f84ac04429c13a7ff43785d75ad27569f2951ce0ffd30a3321230db2fc727216"
dependencies = [
- "phf_generator",
- "phf_shared",
- "proc-macro2",
- "quote",
- "syn",
+ "phf_generator 0.11.3",
+ "phf_shared 0.11.3",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.106",
+]
+
+[[package]]
+name = "phf_shared"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c00cf8b9eafe68dde5e9eaa2cef8ee84a9336a47d566ec55ca16589633b65af7"
+dependencies = [
+ "siphasher 0.3.11",
+]
+
+[[package]]
+name = "phf_shared"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096"
+dependencies = [
+ "siphasher 0.3.11",
]
[[package]]
@@ -2188,7 +2581,7 @@ version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5"
dependencies = [
- "siphasher",
+ "siphasher 1.0.1",
]
[[package]]
@@ -2293,9 +2686,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b"
dependencies = [
"proc-macro2",
- "syn",
+ "syn 2.0.106",
]
+[[package]]
+name = "proc-macro-hack"
+version = "0.5.20+deprecated"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068"
+
[[package]]
name = "proc-macro2"
version = "1.0.101"
@@ -2305,6 +2704,32 @@ dependencies = [
"unicode-ident",
]
+[[package]]
+name = "proptest"
+version = "1.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6fcdab19deb5195a31cf7726a210015ff1496ba1464fd42cb4f537b8b01b471f"
+dependencies = [
+ "bit-set",
+ "bit-vec",
+ "bitflags 2.9.2",
+ "lazy_static",
+ "num-traits",
+ "rand 0.9.2",
+ "rand_chacha 0.9.0",
+ "rand_xorshift",
+ "regex-syntax 0.8.5",
+ "rusty-fork",
+ "tempfile",
+ "unarray",
+]
+
+[[package]]
+name = "quick-error"
+version = "1.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0"
+
[[package]]
name = "quote"
version = "1.0.40"
@@ -2320,6 +2745,20 @@ version = "5.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
+[[package]]
+name = "rand"
+version = "0.7.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03"
+dependencies = [
+ "getrandom 0.1.16",
+ "libc",
+ "rand_chacha 0.2.2",
+ "rand_core 0.5.1",
+ "rand_hc",
+ "rand_pcg",
+]
+
[[package]]
name = "rand"
version = "0.8.5"
@@ -2327,8 +2766,28 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
dependencies = [
"libc",
- "rand_chacha",
- "rand_core",
+ "rand_chacha 0.3.1",
+ "rand_core 0.6.4",
+]
+
+[[package]]
+name = "rand"
+version = "0.9.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1"
+dependencies = [
+ "rand_chacha 0.9.0",
+ "rand_core 0.9.3",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402"
+dependencies = [
+ "ppv-lite86",
+ "rand_core 0.5.1",
]
[[package]]
@@ -2338,7 +2797,26 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
dependencies = [
"ppv-lite86",
- "rand_core",
+ "rand_core 0.6.4",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
+dependencies = [
+ "ppv-lite86",
+ "rand_core 0.9.3",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19"
+dependencies = [
+ "getrandom 0.1.16",
]
[[package]]
@@ -2350,6 +2828,15 @@ dependencies = [
"getrandom 0.2.16",
]
+[[package]]
+name = "rand_core"
+version = "0.9.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38"
+dependencies = [
+ "getrandom 0.3.3",
+]
+
[[package]]
name = "rand_distr"
version = "0.4.3"
@@ -2357,7 +2844,34 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31"
dependencies = [
"num-traits",
- "rand",
+ "rand 0.8.5",
+]
+
+[[package]]
+name = "rand_hc"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c"
+dependencies = [
+ "rand_core 0.5.1",
+]
+
+[[package]]
+name = "rand_pcg"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "16abd0c1b639e9eb4d7c50c0b8100b0d0f849be2349829c740fe8e6eb4816429"
+dependencies = [
+ "rand_core 0.5.1",
+]
+
+[[package]]
+name = "rand_xorshift"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "513962919efc330f829edb2535844d1b912b0fbe2ca165d613e4e8788bb05a5a"
+dependencies = [
+ "rand_core 0.9.3",
]
[[package]]
@@ -2380,13 +2894,27 @@ dependencies = [
"crossbeam-utils",
]
+[[package]]
+name = "readability"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e7843b159286299dd2b90f06d904ae1a8017a650d88d716c85dd6f123947f399"
+dependencies = [
+ "html5ever 0.25.2",
+ "lazy_static",
+ "markup5ever_rcdom",
+ "regex",
+ "reqwest 0.11.27",
+ "url",
+]
+
[[package]]
name = "redox_syscall"
version = "0.5.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77"
dependencies = [
- "bitflags",
+ "bitflags 2.9.2",
]
[[package]]
@@ -2433,6 +2961,46 @@ version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
+[[package]]
+name = "reqwest"
+version = "0.11.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dd67538700a17451e7cba03ac727fb961abb7607553461627b97de0b89cf4a62"
+dependencies = [
+ "base64 0.21.7",
+ "bytes",
+ "encoding_rs",
+ "futures-core",
+ "futures-util",
+ "h2 0.3.27",
+ "http 0.2.12",
+ "http-body 0.4.6",
+ "hyper 0.14.32",
+ "hyper-tls 0.5.0",
+ "ipnet",
+ "js-sys",
+ "log",
+ "mime",
+ "native-tls",
+ "once_cell",
+ "percent-encoding",
+ "pin-project-lite",
+ "rustls-pemfile",
+ "serde",
+ "serde_json",
+ "serde_urlencoded",
+ "sync_wrapper 0.1.2",
+ "system-configuration 0.5.1",
+ "tokio",
+ "tokio-native-tls",
+ "tower-service",
+ "url",
+ "wasm-bindgen",
+ "wasm-bindgen-futures",
+ "web-sys",
+ "winreg",
+]
+
[[package]]
name = "reqwest"
version = "0.12.23"
@@ -2440,18 +3008,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d429f34c8092b2d42c7c93cec323bb4adeb7c67698f70839adec842ec10c7ceb"
dependencies = [
"async-compression",
- "base64",
+ "base64 0.22.1",
"bytes",
"encoding_rs",
"futures-core",
"futures-util",
- "h2",
- "http",
- "http-body",
+ "h2 0.4.12",
+ "http 1.3.1",
+ "http-body 1.0.1",
"http-body-util",
- "hyper",
+ "hyper 1.7.0",
"hyper-rustls",
- "hyper-tls",
+ "hyper-tls 0.6.0",
"hyper-util",
"js-sys",
"log",
@@ -2463,7 +3031,7 @@ dependencies = [
"serde",
"serde_json",
"serde_urlencoded",
- "sync_wrapper",
+ "sync_wrapper 1.0.2",
"tokio",
"tokio-native-tls",
"tokio-util",
@@ -2503,7 +3071,7 @@ dependencies = [
"num-traits",
"pkcs1",
"pkcs8",
- "rand_core",
+ "rand_core 0.6.4",
"signature",
"spki",
"subtle",
@@ -2530,7 +3098,7 @@ dependencies = [
"proc-macro2",
"quote",
"rust-embed-utils",
- "syn",
+ "syn 2.0.106",
"walkdir",
]
@@ -2566,13 +3134,22 @@ version = "2.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
+[[package]]
+name = "rustc_version"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92"
+dependencies = [
+ "semver",
+]
+
[[package]]
name = "rustix"
version = "0.38.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154"
dependencies = [
- "bitflags",
+ "bitflags 2.9.2",
"errno",
"libc",
"linux-raw-sys 0.4.15",
@@ -2585,7 +3162,7 @@ version = "1.0.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "11181fbabf243db407ef8df94a6ce0b2f9a733bd8be4ad02b4eda9602296cac8"
dependencies = [
- "bitflags",
+ "bitflags 2.9.2",
"errno",
"libc",
"linux-raw-sys 0.9.4",
@@ -2606,6 +3183,15 @@ dependencies = [
"zeroize",
]
+[[package]]
+name = "rustls-pemfile"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c"
+dependencies = [
+ "base64 0.21.7",
+]
+
[[package]]
name = "rustls-pki-types"
version = "1.12.0"
@@ -2632,6 +3218,18 @@ version = "1.0.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
+[[package]]
+name = "rusty-fork"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cb3dcc6e454c328bb824492db107ab7c0ae8fcffe4ad210136ef014458c1bc4f"
+dependencies = [
+ "fnv",
+ "quick-error",
+ "tempfile",
+ "wait-timeout",
+]
+
[[package]]
name = "ryu"
version = "1.0.20"
@@ -2668,12 +3266,12 @@ version = "0.24.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5f3a24d916e78954af99281a455168d4a9515d65eca99a18da1b813689c4ad9"
dependencies = [
- "cssparser",
+ "cssparser 0.35.0",
"ego-tree",
"getopts",
- "html5ever",
+ "html5ever 0.35.0",
"precomputed-hash",
- "selectors",
+ "selectors 0.31.0",
"tendril",
]
@@ -2683,7 +3281,7 @@ version = "2.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02"
dependencies = [
- "bitflags",
+ "bitflags 2.9.2",
"core-foundation",
"core-foundation-sys",
"libc",
@@ -2700,25 +3298,51 @@ dependencies = [
"libc",
]
+[[package]]
+name = "selectors"
+version = "0.22.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df320f1889ac4ba6bc0cdc9c9af7af4bd64bb927bccdf32d81140dc1f9be12fe"
+dependencies = [
+ "bitflags 1.3.2",
+ "cssparser 0.27.2",
+ "derive_more 0.99.20",
+ "fxhash",
+ "log",
+ "matches",
+ "phf 0.8.0",
+ "phf_codegen 0.8.0",
+ "precomputed-hash",
+ "servo_arc 0.1.1",
+ "smallvec",
+ "thin-slice",
+]
+
[[package]]
name = "selectors"
version = "0.31.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5685b6ae43bfcf7d2e7dfcfb5d8e8f61b46442c902531e41a32a9a8bf0ee0fb6"
dependencies = [
- "bitflags",
- "cssparser",
- "derive_more",
+ "bitflags 2.9.2",
+ "cssparser 0.35.0",
+ "derive_more 2.0.1",
"fxhash",
"log",
"new_debug_unreachable",
- "phf",
- "phf_codegen",
+ "phf 0.11.3",
+ "phf_codegen 0.11.3",
"precomputed-hash",
- "servo_arc",
+ "servo_arc 0.4.1",
"smallvec",
]
+[[package]]
+name = "semver"
+version = "1.0.26"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0"
+
[[package]]
name = "serde"
version = "1.0.219"
@@ -2736,7 +3360,7 @@ checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
dependencies = [
"proc-macro2",
"quote",
- "syn",
+ "syn 2.0.106",
]
[[package]]
@@ -2745,7 +3369,7 @@ version = "1.0.143"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d401abef1d108fbd9cbaebc3e46611f4b1021f714a0597a71f41ee463f5f4a5a"
dependencies = [
- "itoa",
+ "itoa 1.0.15",
"memchr",
"ryu",
"serde",
@@ -2757,7 +3381,7 @@ version = "0.1.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "59fab13f937fa393d08645bf3a84bdfe86e296747b506ada67bb15f10f218b2a"
dependencies = [
- "itoa",
+ "itoa 1.0.15",
"serde",
]
@@ -2768,11 +3392,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd"
dependencies = [
"form_urlencoded",
- "itoa",
+ "itoa 1.0.15",
"ryu",
"serde",
]
+[[package]]
+name = "servo_arc"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d98238b800e0d1576d8b6e3de32827c2d74bee68bb97748dcf5071fb53965432"
+dependencies = [
+ "nodrop",
+ "stable_deref_trait",
+]
+
[[package]]
name = "servo_arc"
version = "0.4.1"
@@ -2835,7 +3469,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de"
dependencies = [
"digest",
- "rand_core",
+ "rand_core 0.6.4",
]
[[package]]
@@ -2853,9 +3487,15 @@ dependencies = [
"num-bigint",
"num-traits",
"thiserror",
- "time",
+ "time 0.3.41",
]
+[[package]]
+name = "siphasher"
+version = "0.3.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d"
+
[[package]]
name = "siphasher"
version = "1.0.1"
@@ -2886,6 +3526,16 @@ dependencies = [
"serde",
]
+[[package]]
+name = "socket2"
+version = "0.5.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678"
+dependencies = [
+ "libc",
+ "windows-sys 0.52.0",
+]
+
[[package]]
name = "socket2"
version = "0.6.0"
@@ -2934,7 +3584,7 @@ version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ee6798b1838b6a0f69c007c133b8df5866302197e404e8b6ee8ed3e3a5e68dc6"
dependencies = [
- "base64",
+ "base64 0.22.1",
"bytes",
"chrono",
"crc",
@@ -2976,7 +3626,7 @@ dependencies = [
"quote",
"sqlx-core",
"sqlx-macros-core",
- "syn",
+ "syn 2.0.106",
]
[[package]]
@@ -2999,7 +3649,7 @@ dependencies = [
"sqlx-mysql",
"sqlx-postgres",
"sqlx-sqlite",
- "syn",
+ "syn 2.0.106",
"tokio",
"url",
]
@@ -3011,8 +3661,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aa003f0038df784eb8fecbbac13affe3da23b45194bd57dba231c8f48199c526"
dependencies = [
"atoi",
- "base64",
- "bitflags",
+ "base64 0.22.1",
+ "bitflags 2.9.2",
"byteorder",
"bytes",
"chrono",
@@ -3028,13 +3678,13 @@ dependencies = [
"hex",
"hkdf",
"hmac",
- "itoa",
+ "itoa 1.0.15",
"log",
"md-5",
"memchr",
"once_cell",
"percent-encoding",
- "rand",
+ "rand 0.8.5",
"rsa",
"serde",
"sha1",
@@ -3055,8 +3705,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db58fcd5a53cf07c184b154801ff91347e4c30d17a3562a635ff028ad5deda46"
dependencies = [
"atoi",
- "base64",
- "bitflags",
+ "base64 0.22.1",
+ "bitflags 2.9.2",
"byteorder",
"chrono",
"crc",
@@ -3069,12 +3719,12 @@ dependencies = [
"hkdf",
"hmac",
"home",
- "itoa",
+ "itoa 1.0.15",
"log",
"md-5",
"memchr",
"once_cell",
- "rand",
+ "rand 0.8.5",
"serde",
"serde_json",
"sha2",
@@ -3127,7 +3777,7 @@ checksum = "bf776ba3fa74f83bf4b63c3dcbbf82173db2632ed8452cb2d891d33f459de70f"
dependencies = [
"new_debug_unreachable",
"parking_lot",
- "phf_shared",
+ "phf_shared 0.11.3",
"precomputed-hash",
"serde",
]
@@ -3138,8 +3788,8 @@ version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c711928715f1fe0fe509c53b43e993a9a557babc2d0a3567d0a3006f1ac931a0"
dependencies = [
- "phf_generator",
- "phf_shared",
+ "phf_generator 0.11.3",
+ "phf_shared 0.11.3",
"proc-macro2",
"quote",
]
@@ -3167,6 +3817,17 @@ version = "2.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
+[[package]]
+name = "syn"
+version = "1.0.109"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
[[package]]
name = "syn"
version = "2.0.106"
@@ -3178,6 +3839,12 @@ dependencies = [
"unicode-ident",
]
+[[package]]
+name = "sync_wrapper"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160"
+
[[package]]
name = "sync_wrapper"
version = "1.0.2"
@@ -3195,7 +3862,18 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2"
dependencies = [
"proc-macro2",
"quote",
- "syn",
+ "syn 2.0.106",
+]
+
+[[package]]
+name = "system-configuration"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7"
+dependencies = [
+ "bitflags 1.3.2",
+ "core-foundation",
+ "system-configuration-sys 0.5.0",
]
[[package]]
@@ -3204,9 +3882,19 @@ version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b"
dependencies = [
- "bitflags",
+ "bitflags 2.9.2",
"core-foundation",
- "system-configuration-sys",
+ "system-configuration-sys 0.6.0",
+]
+
+[[package]]
+name = "system-configuration-sys"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9"
+dependencies = [
+ "core-foundation-sys",
+ "libc",
]
[[package]]
@@ -3227,7 +3915,7 @@ checksum = "64a966cb0e76e311f09cf18507c9af192f15d34886ee43d7ba7c7e3803660c43"
dependencies = [
"aho-corasick",
"arc-swap",
- "base64",
+ "base64 0.22.1",
"bitpacking",
"bon",
"byteorder",
@@ -3266,7 +3954,7 @@ dependencies = [
"tantivy-tokenizer-api",
"tempfile",
"thiserror",
- "time",
+ "time 0.3.41",
"uuid",
"winapi",
]
@@ -3306,7 +3994,7 @@ dependencies = [
"byteorder",
"ownedbytes",
"serde",
- "time",
+ "time 0.3.41",
]
[[package]]
@@ -3395,6 +4083,12 @@ version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f50febec83f5ee1df3015341d8bd429f2d1cc62bcba7ea2076759d315084683"
+[[package]]
+name = "thin-slice"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8eaa81235c7058867fa8c0e7314f33dcce9c215f535d1913822a2b3f5e289f3c"
+
[[package]]
name = "thiserror"
version = "2.0.16"
@@ -3412,7 +4106,7 @@ checksum = "6c5e1be1c48b9172ee610da68fd9cd2770e7a4056cb3fc98710ee6906f0c7960"
dependencies = [
"proc-macro2",
"quote",
- "syn",
+ "syn 2.0.106",
]
[[package]]
@@ -3424,6 +4118,17 @@ dependencies = [
"cfg-if",
]
+[[package]]
+name = "time"
+version = "0.1.45"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1b797afad3f312d1c66a56d11d0316f916356d11bd158fbc6ca6389ff6bf805a"
+dependencies = [
+ "libc",
+ "wasi 0.10.0+wasi-snapshot-preview1",
+ "winapi",
+]
+
[[package]]
name = "time"
version = "0.3.41"
@@ -3431,7 +4136,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a7619e19bc266e0f9c5e6686659d394bc57973859340060a69221e57dbc0c40"
dependencies = [
"deranged",
- "itoa",
+ "itoa 1.0.15",
"num-conv",
"powerfmt",
"serde",
@@ -3495,7 +4200,7 @@ dependencies = [
"pin-project-lite",
"signal-hook-registry",
"slab",
- "socket2",
+ "socket2 0.6.0",
"tokio-macros",
"windows-sys 0.59.0",
]
@@ -3508,7 +4213,7 @@ checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8"
dependencies = [
"proc-macro2",
"quote",
- "syn",
+ "syn 2.0.106",
]
[[package]]
@@ -3565,7 +4270,7 @@ dependencies = [
"futures-core",
"futures-util",
"pin-project-lite",
- "sync_wrapper",
+ "sync_wrapper 1.0.2",
"tokio",
"tower-layer",
"tower-service",
@@ -3578,11 +4283,11 @@ version = "0.6.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2"
dependencies = [
- "bitflags",
+ "bitflags 2.9.2",
"bytes",
"futures-util",
- "http",
- "http-body",
+ "http 1.3.1",
+ "http-body 1.0.1",
"iri-string",
"pin-project-lite",
"tower",
@@ -3624,7 +4329,7 @@ checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903"
dependencies = [
"proc-macro2",
"quote",
- "syn",
+ "syn 2.0.106",
]
[[package]]
@@ -3691,6 +4396,12 @@ version = "1.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f"
+[[package]]
+name = "unarray"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94"
+
[[package]]
name = "unicase"
version = "2.8.1"
@@ -3745,6 +4456,7 @@ dependencies = [
"form_urlencoded",
"idna",
"percent-encoding",
+ "serde",
]
[[package]]
@@ -3799,7 +4511,7 @@ dependencies = [
"proc-macro2",
"quote",
"regex",
- "syn",
+ "syn 2.0.106",
"uuid",
]
@@ -3810,7 +4522,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d047458f1b5b65237c2f6dc6db136945667f40a7668627b3490b9513a3d43a55"
dependencies = [
"axum 0.8.4",
- "base64",
+ "base64 0.22.1",
"mime_guess",
"regex",
"rust-embed",
@@ -3851,6 +4563,15 @@ version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
+[[package]]
+name = "wait-timeout"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09ac3b126d3914f9849036f826e054cbabdc8519970b8998ddaf3b5bd3c65f11"
+dependencies = [
+ "libc",
+]
+
[[package]]
name = "walkdir"
version = "2.5.0"
@@ -3870,6 +4591,18 @@ dependencies = [
"try-lock",
]
+[[package]]
+name = "wasi"
+version = "0.9.0+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519"
+
+[[package]]
+name = "wasi"
+version = "0.10.0+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f"
+
[[package]]
name = "wasi"
version = "0.11.1+wasi-snapshot-preview1"
@@ -3913,7 +4646,7 @@ dependencies = [
"log",
"proc-macro2",
"quote",
- "syn",
+ "syn 2.0.106",
"wasm-bindgen-shared",
]
@@ -3948,7 +4681,7 @@ checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de"
dependencies = [
"proc-macro2",
"quote",
- "syn",
+ "syn 2.0.106",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
@@ -3978,8 +4711,8 @@ version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57ffde1dc01240bdf9992e3205668b235e59421fd085e8a317ed98da0178d414"
dependencies = [
- "phf",
- "phf_codegen",
+ "phf 0.11.3",
+ "phf_codegen 0.11.3",
"string_cache",
"string_cache_codegen",
]
@@ -4002,6 +4735,16 @@ dependencies = [
"rustls-pki-types",
]
+[[package]]
+name = "whatlang"
+version = "0.16.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "471d1c1645d361eb782a1650b1786a8fb58dd625e681a04c09f5ff7c8764a7b0"
+dependencies = [
+ "hashbrown 0.14.5",
+ "once_cell",
+]
+
[[package]]
name = "whoami"
version = "1.6.1"
@@ -4064,7 +4807,7 @@ checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836"
dependencies = [
"proc-macro2",
"quote",
- "syn",
+ "syn 2.0.106",
]
[[package]]
@@ -4075,7 +4818,7 @@ checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8"
dependencies = [
"proc-macro2",
"quote",
- "syn",
+ "syn 2.0.106",
]
[[package]]
@@ -4335,6 +5078,16 @@ version = "0.53.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486"
+[[package]]
+name = "winreg"
+version = "0.50.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1"
+dependencies = [
+ "cfg-if",
+ "windows-sys 0.48.0",
+]
+
[[package]]
name = "wiremock"
version = "0.6.5"
@@ -4342,12 +5095,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08db1edfb05d9b3c1542e521aea074442088292f00b5f28e435c714a98f85031"
dependencies = [
"assert-json-diff",
- "base64",
+ "base64 0.22.1",
"deadpool",
"futures",
- "http",
+ "http 1.3.1",
"http-body-util",
- "hyper",
+ "hyper 1.7.0",
"hyper-util",
"log",
"once_cell",
@@ -4364,7 +5117,7 @@ version = "0.39.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1"
dependencies = [
- "bitflags",
+ "bitflags 2.9.2",
]
[[package]]
@@ -4373,6 +5126,18 @@ version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb"
+[[package]]
+name = "xml5ever"
+version = "0.16.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9234163818fd8e2418fcde330655e757900d4236acd8cc70fef345ef91f6d865"
+dependencies = [
+ "log",
+ "mac",
+ "markup5ever 0.10.1",
+ "time 0.1.45",
+]
+
[[package]]
name = "yoke"
version = "0.8.0"
@@ -4393,7 +5158,7 @@ checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6"
dependencies = [
"proc-macro2",
"quote",
- "syn",
+ "syn 2.0.106",
"synstructure",
]
@@ -4414,7 +5179,7 @@ checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181"
dependencies = [
"proc-macro2",
"quote",
- "syn",
+ "syn 2.0.106",
]
[[package]]
@@ -4434,7 +5199,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502"
dependencies = [
"proc-macro2",
"quote",
- "syn",
+ "syn 2.0.106",
"synstructure",
]
@@ -4474,7 +5239,7 @@ checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f"
dependencies = [
"proc-macro2",
"quote",
- "syn",
+ "syn 2.0.106",
]
[[package]]
diff --git a/Cargo.toml b/Cargo.toml
index c467d94..ebb0d1e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -24,7 +24,7 @@ utoipa-axum = { version = "0.1.0" }
utoipa-swagger-ui = { version = "9.0.2", features = ["axum"] }
reqwest = { version = "0.12.23", features = ["json", "gzip", "brotli", "deflate"] }
scraper = { version = "0.24.0" }
-url = { version = "2.5" }
+url = { version = "2.5", features = ["serde"] }
bytes = { version = "1.5" }
encoding_rs = { version = "0.8" }
chardetng = { version = "0.1" }
@@ -49,6 +49,13 @@ tower = { version = "0.5" }
tower-http = { version = "0.6", features = ["trace", "request-id"] }
tokio-util = { version = "0.7", features = ["rt"] }
rand = { version = "0.8" }
+readability = "0.2"
+kuchiki = "0.8"
+ammonia = "3.3"
+whatlang = "0.16"
+linkify = "0.10"
+percent-encoding = "2.3"
+proptest = { version = "1", optional = true }
[dev-dependencies]
mockall = "0.13"
@@ -56,3 +63,6 @@ tower = "0.5"
hyper = "1.0"
wiremock = "0.6"
flate2 = "1.0"
+
+[features]
+fuzz = ["proptest"]
diff --git a/fuzz/fuzz_targets/extractor.rs b/fuzz/fuzz_targets/extractor.rs
new file mode 100644
index 0000000..089f0c3
--- /dev/null
+++ b/fuzz/fuzz_targets/extractor.rs
@@ -0,0 +1,29 @@
+#![no_main]
+
+use libfuzzer_sys::fuzz_target;
+use bytes::Bytes;
+use chrono::Utc;
+use reqwest::{StatusCode, HeaderMap};
+use url::Url;
+
+use capsule::extractor::extract;
+use capsule::fetcher::types::{PageResponse, Charset};
+
+fuzz_target!(|data: &[u8]| {
+ // Convert raw bytes to string, handling invalid UTF-8 gracefully
+ let html = String::from_utf8_lossy(data).to_string();
+
+ // Create a test response
+ let response = PageResponse {
+ url_final: Url::parse("https://example.com").unwrap(),
+ status: StatusCode::OK,
+ headers: HeaderMap::new(),
+ body_raw: Bytes::from(html.clone()),
+ body_utf8: html,
+ charset: Charset::Utf8,
+ fetched_at: Utc::now(),
+ };
+
+ // The extractor should never panic regardless of input
+ let _ = futures::executor::block_on(extract(&response));
+});
diff --git a/src/auth/middleware.rs b/src/auth/middleware.rs
index f77f079..a2e75e8 100644
--- a/src/auth/middleware.rs
+++ b/src/auth/middleware.rs
@@ -96,7 +96,7 @@ pub async fn auth_middleware(req: Request, next: Next) -> Response {
#[cfg(test)]
mod tests {
use super::*;
- use crate::{app_state::AppState, repositories::user::MockUserRepositoryTrait};
+ use crate::{app_state::AppState, config::Config, repositories::user::MockUserRepositoryTrait};
use axum::{
Json, Router,
body::to_bytes,
@@ -135,7 +135,9 @@ mod tests {
}
fn create_jwt_token(user_id: Uuid) -> String {
- let jwt_service = JwtService::new("test-secret-key");
+ // Use the same config loading logic as the middleware
+ let config = Config::from_env().expect("Failed to load config");
+ let jwt_service = JwtService::new(config.jwt_secret());
jwt_service
.generate_token(user_id)
.expect("Failed to generate token")
@@ -146,7 +148,9 @@ mod tests {
use chrono::{Duration, Utc};
use jsonwebtoken::{EncodingKey, Header, encode};
- let encoding_key = EncodingKey::from_secret(b"test-secret-key");
+ // Use the same config loading logic as the middleware
+ let config = Config::from_env().expect("Failed to load config");
+ let encoding_key = EncodingKey::from_secret(config.jwt_secret().as_bytes());
let now = Utc::now();
let expired_time = now - Duration::hours(1);
@@ -238,11 +242,6 @@ mod tests {
#[tokio::test]
async fn test_valid_jwt_token_success() {
- // Set test environment variable to ensure consistent config
- unsafe {
- std::env::set_var("JWT_SECRET", "test-secret-key");
- }
-
let app = create_test_app();
let user_id = Uuid::new_v4();
let token = create_jwt_token(user_id);
@@ -260,11 +259,6 @@ mod tests {
#[tokio::test]
async fn test_extractor_returns_correct_user_id() {
- // Set test environment variable to ensure consistent config
- unsafe {
- std::env::set_var("JWT_SECRET", "test-secret-key");
- }
-
let app = create_test_app();
let user_id = Uuid::new_v4();
let token = create_jwt_token(user_id);
diff --git a/src/extractor/cleaner.rs b/src/extractor/cleaner.rs
new file mode 100644
index 0000000..75608e9
--- /dev/null
+++ b/src/extractor/cleaner.rs
@@ -0,0 +1,97 @@
+use ammonia::Builder;
+use regex::Regex;
+use url::Url;
+
+use crate::extractor::model::{ReadabilityResult, normalize_whitespace};
+
+pub fn sanitize_and_resolve_links(result: &mut ReadabilityResult, base_url: &Url) {
+ // Clean the HTML with Ammonia (removes scripts, styles, dangerous elements)
+ let clean_html = Builder::default().clean(&result.html).to_string();
+
+ // Manually resolve relative links to absolute
+ result.html = resolve_links(&clean_html, base_url);
+
+ // Normalize whitespace in text content
+ result.text = normalize_whitespace(&result.text);
+}
+
+fn resolve_links(html: &str, base_url: &Url) -> String {
+ // Resolve relative href attributes
+ let href_regex = Regex::new(r#"href="([^"]+)""#).unwrap();
+ let html = href_regex.replace_all(html, |caps: ®ex::Captures| {
+ let url_str = &caps[1];
+ if let Ok(absolute_url) = base_url.join(url_str) {
+ format!(r#"href="{}""#, absolute_url)
+ } else {
+ caps[0].to_string()
+ }
+ });
+
+ // Resolve relative src attributes
+ let src_regex = Regex::new(r#"src="([^"]+)""#).unwrap();
+ let html = src_regex.replace_all(&html, |caps: ®ex::Captures| {
+ let url_str = &caps[1];
+ if let Ok(absolute_url) = base_url.join(url_str) {
+ format!(r#"src="{}""#, absolute_url)
+ } else {
+ caps[0].to_string()
+ }
+ });
+
+ html.to_string()
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_sanitize_removes_dangerous_elements() {
+ let mut result = ReadabilityResult {
+ title: "Test".to_string(),
+ site_name: None,
+ byline: None,
+ text: "Hello world".to_string(),
+ html:
+ r#"
Hello world
"#
+ .to_string(),
+ };
+
+ let base_url = Url::parse("https://example.com").unwrap();
+ sanitize_and_resolve_links(&mut result, &base_url);
+
+ assert!(!result.html.contains("
+
+
+
+
+
+
+
+ Sample Article
+ By John Doe
+ This is the first paragraph of a sample article. It contains meaningful content that should be extracted and indexed for search purposes.
+ This is the second paragraph with more substantial content. It includes a relative link and an image.
+
+ The article continues with more paragraphs containing valuable information for readers.
+
+
+
+
+