diff --git a/lychee-lib/src/extract/html/html5ever.rs b/lychee-lib/src/extract/html/html5ever.rs index 6bfb8d40ca..fae25d6161 100644 --- a/lychee-lib/src/extract/html/html5ever.rs +++ b/lychee-lib/src/extract/html/html5ever.rs @@ -583,4 +583,40 @@ mod tests { let uris = extract_html(input, false); assert!(uris.is_empty()); } + + #[test] + fn test_extract_links_after_empty_verbatim_block() { + // Test that links are correctly extracted after empty
blocks
+ let input = r#"
+
+
+ See First
+
+
+
+
+
+ See Second
+
+
+ "#;
+
+ let expected = vec![
+ RawUri {
+ text: "https://example.com/1".to_string(),
+ element: Some("a".to_string()),
+ attribute: Some("href".to_string()),
+ span: span_line(4),
+ },
+ RawUri {
+ text: "https://example.com/2".to_string(),
+ element: Some("a".to_string()),
+ attribute: Some("href".to_string()),
+ span: span_line(10),
+ },
+ ];
+
+ let uris = extract_html(input, false);
+ assert_eq!(uris, expected);
+ }
}
diff --git a/lychee-lib/src/extract/html/html5gum.rs b/lychee-lib/src/extract/html/html5gum.rs
index ad6f4ee4d3..c7b58c135f 100644
--- a/lychee-lib/src/extract/html/html5gum.rs
+++ b/lychee-lib/src/extract/html/html5gum.rs
@@ -278,14 +278,15 @@ impl Callback<(), usize> for &mut LinkExtractor {
self.verbatim_stack.pop();
}
}
- CallbackEvent::EndTag { .. } => {
+ CallbackEvent::EndTag { name } => {
+ let tag_name = String::from_utf8_lossy(name);
// Update the current verbatim element name.
//
// Keeps track of the last verbatim element name, so that we can
// properly handle nested verbatim blocks.
- if self.filter_verbatim_here()
+ if !self.include_verbatim
&& let Some(last_verbatim) = self.verbatim_stack.last()
- && last_verbatim == &self.current_element
+ && last_verbatim == tag_name.as_ref()
{
self.verbatim_stack.pop();
}
@@ -726,4 +727,40 @@ mod tests {
let actual = extract_html_fragments(input);
assert_eq!(actual, expected);
}
+
+ #[test]
+ fn test_extract_links_after_empty_verbatim_block() {
+ // Test that links are correctly extracted after empty blocks
+ let input = r#"
+
+
+ See First
+
+
+
+
+
+ See Second
+
+
+ "#;
+
+ let expected = vec![
+ RawUri {
+ text: "https://example.com/1".to_string(),
+ element: Some("a".to_string()),
+ attribute: Some("href".to_string()),
+ span: span(4, 30),
+ },
+ RawUri {
+ text: "https://example.com/2".to_string(),
+ element: Some("a".to_string()),
+ attribute: Some("href".to_string()),
+ span: span(10, 30),
+ },
+ ];
+
+ let uris = extract_html(input, false);
+ assert_eq!(uris, expected);
+ }
}