From fcd8377dda587a9108596517421c7c96b755ff61 Mon Sep 17 00:00:00 2001 From: tooomm Date: Mon, 16 Jun 2025 20:46:31 +0200 Subject: [PATCH 1/2] Add xml schema found in xsd files See e.g. https://www.w3schools.com/xml/schema_intro.asp --- lychee-lib/src/filter/mod.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/lychee-lib/src/filter/mod.rs b/lychee-lib/src/filter/mod.rs index 0b0423527d..d3fbb6bdf0 100644 --- a/lychee-lib/src/filter/mod.rs +++ b/lychee-lib/src/filter/mod.rs @@ -46,6 +46,7 @@ const FALSE_POSITIVE_PAT: &[&str] = &[ r"^https?://www.w3.org/1999/xhtml", r"^https?://www.w3.org/1999/xlink", r"^https?://www.w3.org/2000/svg", + r"^https?://www.w3.org/2001/XMLSchema-instance", r"^https?://ogp.me/ns#", r"^https?://schemas.microsoft.com", r"^https?://(.*)/xmlrpc.php$", From 93090c30b9cfa76257702fd182f8e8943eac26ac Mon Sep 17 00:00:00 2001 From: tooomm Date: Thu, 19 Jun 2025 18:01:48 +0200 Subject: [PATCH 2/2] escape dots in urls --- lychee-lib/src/filter/mod.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/lychee-lib/src/filter/mod.rs b/lychee-lib/src/filter/mod.rs index d3fbb6bdf0..e30395b15e 100644 --- a/lychee-lib/src/filter/mod.rs +++ b/lychee-lib/src/filter/mod.rs @@ -41,15 +41,15 @@ static UNSUPPORTED_DOMAINS: LazyLock> = LazyLock::new(|| { /// Pre-defined exclusions for known false-positives const FALSE_POSITIVE_PAT: &[&str] = &[ - r"^https?://schemas.openxmlformats.org", - r"^https?://schemas.zune.net", - r"^https?://www.w3.org/1999/xhtml", - r"^https?://www.w3.org/1999/xlink", - r"^https?://www.w3.org/2000/svg", - r"^https?://www.w3.org/2001/XMLSchema-instance", - r"^https?://ogp.me/ns#", - r"^https?://schemas.microsoft.com", - r"^https?://(.*)/xmlrpc.php$", + r"^https?://schemas\.openxmlformats\.org", + r"^https?://schemas\.microsoft\.com", + r"^https?://schemas\.zune\.net", + r"^https?://www\.w3\.org/1999/xhtml", + r"^https?://www\.w3\.org/1999/xlink", + r"^https?://www\.w3\.org/2000/svg", + r"^https?://www\.w3\.org/2001/XMLSchema-instance", + r"^https?://ogp\.me/ns#", + r"^https?://(.*)/xmlrpc\.php$", ]; static FALSE_POSITIVE_SET: LazyLock =