diff --git a/Cargo.lock b/Cargo.lock
index 57c716960f..0f8552e94d 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1821,9 +1821,9 @@ dependencies = [
 
 [[package]]
 name = "html5gum"
-version = "0.7.0"
+version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b3918b5f36d61861b757261da986b51be562c7a87ac4e531d4158e67e08bff72"
+checksum = "ba6fbe46e93059ce8ee19fbefdb0c7699cc7197fcaac048f2c3593f3e5da845f"
 dependencies = [
  "jetscii",
 ]
diff --git a/lychee-lib/Cargo.toml b/lychee-lib/Cargo.toml
index 61587bc97c..7959ace62e 100644
--- a/lychee-lib/Cargo.toml
+++ b/lychee-lib/Cargo.toml
@@ -24,7 +24,7 @@ futures = "0.3.31"
 glob = "0.3.2"
 headers = "0.4.0"
 html5ever = "0.31.0"
-html5gum = "0.7.0"
+html5gum = "0.8.0"
 http = "1.3.1"
 hyper = "1.6.0"
 ignore = "0.4.23"
diff --git a/lychee-lib/src/extract/html/html5ever.rs b/lychee-lib/src/extract/html/html5ever.rs
index d7fa67bbf1..316f1df5ad 100644
--- a/lychee-lib/src/extract/html/html5ever.rs
+++ b/lychee-lib/src/extract/html/html5ever.rs
@@ -9,9 +9,33 @@ use html5ever::{
 use super::{
     super::plaintext::extract_raw_uri_from_plaintext, is_email_link, is_verbatim_elem, srcset,
 };
-use crate::types::uri::raw::RawUri;
+use crate::types::uri::raw::{RawUri, RawUriSpan, SourceSpanProvider, SpanProvider};
+
+/// A [`SpanProvider`] which applies a given line offset.
+struct LineOffsetSpanProvider<'a> {
+    /// The number of lines each span will be offset by.
+    lines_before: usize,
+    /// The inner [`SpanProvider`] which will be responsible for computing the spans.
+    inner: &'a SourceSpanProvider<'a>,
+}
 
-#[derive(Clone, Default)]
+impl SpanProvider for LineOffsetSpanProvider<'_> {
+    fn span(&self, offset: usize) -> RawUriSpan {
+        let mut span = self.inner.span(offset);
+        // if we stay in the same line the column information is wrong, since we didn't know the
+        // column beforehand and likely did not start at a linebreak.
+        // This can be improved in the future by using the computed length of lines.
+        if span.line.get() == 1 {
+            span.column = None;
+        }
+        span.line = span
+            .line
+            .saturating_add(self.lines_before.saturating_sub(1));
+        span
+    }
+}
+
+#[derive(Clone)]
 struct LinkExtractor {
     links: RefCell<Vec<RawUri>>,
     include_verbatim: bool,
@@ -22,7 +46,8 @@ impl TokenSink for LinkExtractor {
     type Handle = ();
 
     #[allow(clippy::match_same_arms)]
-    fn process_token(&self, token: Token, _line_number: u64) -> TokenSinkResult<()> {
+    fn process_token(&self, token: Token, line_number: u64) -> TokenSinkResult<()> {
+        debug_assert_ne!(line_number, 0);
         match token {
             Token::CharacterTokens(raw) => {
                 if self.current_verbatim_element_name.borrow().is_some() {
@@ -31,122 +56,16 @@ impl TokenSink for LinkExtractor {
                 if self.include_verbatim {
                     self.links
                         .borrow_mut()
-                        .extend(extract_raw_uri_from_plaintext(&raw));
-                }
-            }
-            Token::TagToken(tag) => {
-                let Tag {
-                    kind,
-                    name,
-                    self_closing: _self_closing,
-                    attrs,
-                } = tag;
-                // Check if this is a verbatim element, which we want to skip.
-                if !self.include_verbatim && is_verbatim_elem(&name) {
-                    // Check if we're currently inside a verbatim block
-                    let mut curr_verbatim_elem = self.current_verbatim_element_name.borrow_mut();
-
-                    if curr_verbatim_elem.is_some() {
-                        // Inside a verbatim block. Check if the verbatim
-                        // element name matches with the current element name.
-                        if curr_verbatim_elem.as_ref() == Some(&name.to_string()) {
-                            // If so, we're done with the verbatim block,
-                            // -- but only if this is an end tag.
-                            if matches!(kind, TagKind::EndTag) {
-                                *curr_verbatim_elem = None;
-                            }
-                        }
-                    } else if matches!(kind, TagKind::StartTag) {
-                        // We're not inside a verbatim block, but we just
-                        // encountered a verbatim element. Remember the name
-                        // of the element.
-                        *curr_verbatim_elem = Some(name.to_string());
-                    }
-                }
-                if self.current_verbatim_element_name.borrow().is_some() {
-                    // We want to skip the content of this element
-                    // as we're inside a verbatim block.
-                    return TokenSinkResult::Continue;
-                }
-
-                // Check for rel=nofollow. We only extract the first `rel` attribute.
-                // This is correct as per https://html.spec.whatwg.org/multipage/syntax.html#attributes-0, which states
-                // "There must never be two or more attributes on the same start tag whose names are an ASCII case-insensitive match for each other."
-                if let Some(rel) = attrs.iter().find(|attr| &attr.name.local == "rel") {
-                    if rel.value.contains("nofollow") {
-                        return TokenSinkResult::Continue;
-                    }
-                }
-
-                // Check and exclude `rel=preconnect` and `rel=dns-prefetch`. Unlike `prefetch` and `preload`,
-                // `preconnect` and `dns-prefetch` only perform DNS lookups and do not necessarily link to a resource
-                if let Some(rel) = attrs.iter().find(|attr| &attr.name.local == "rel") {
-                    if rel.value.contains("preconnect") || rel.value.contains("dns-prefetch") {
-                        return TokenSinkResult::Continue;
-                    }
-                }
-
-                // Check and exclude `prefix` attribute. This attribute is used to define a prefix
-                // for the current element. It is not used to link to a resource.
-                if let Some(_prefix) = attrs.iter().find(|attr| &attr.name.local == "prefix") {
-                    return TokenSinkResult::Continue;
-                }
-
-                for attr in &attrs {
-                    let urls = LinkExtractor::extract_urls_from_elem_attr(
-                        &attr.name.local,
-                        &name,
-                        &attr.value,
-                    );
-
-                    let new_urls = match urls {
-                        None => extract_raw_uri_from_plaintext(&attr.value),
-                        Some(urls) => urls
-                            .into_iter()
-                            .filter(|url| {
-                                // Only accept email addresses which
-                                // - occur in `href` attributes
-                                // - start with `mailto:`
-                                //
-                                // Technically, email addresses could
-                                // also occur in plain text, but we don't want to extract those
-                                // because of the high false positive rate.
-                                //
-                                // This ignores links like `<img srcset="v2@1.5x.png">`
-                                let is_email = is_email_link(url);
-                                let is_mailto = url.starts_with("mailto:");
-                                let is_phone = url.starts_with("tel:");
-                                let is_href = attr.name.local.as_ref() == "href";
-
-                                if attrs.iter().any(|attr| {
-                                    &attr.name.local == "rel" && attr.value.contains("stylesheet")
-                                }) {
-                                    // Skip virtual/framework-specific stylesheet paths that start with /@ or @
-                                    // These are typically resolved by dev servers or build tools rather than being real URLs
-                                    // Examples: /@global/style.css, @tailwind/base.css as in
-                                    // `<link href="/@global/style.css" rel="stylesheet">`
-                                    if url.starts_with("/@") || url.starts_with('@') {
-                                        return false;
-                                    }
-                                    // Skip disabled stylesheets
-                                    // Ref: https://developer.mozilla.org/en-US/docs/Web/API/HTMLLinkElement/disabled
-                                    if attrs.iter().any(|attr| &attr.name.local == "disabled") {
-                                        return false;
-                                    }
-                                }
-
-                                !is_email || (is_mailto && is_href) || (is_phone && is_href)
-                            })
-                            .map(|url| RawUri {
-                                text: url.to_string(),
-                                element: Some(name.to_string()),
-                                attribute: Some(attr.name.local.to_string()),
-                            })
-                            .collect::<Vec<_>>(),
-                    };
-                    self.links.borrow_mut().extend(new_urls);
+                        .extend(extract_raw_uri_from_plaintext(
+                            &raw,
+                            &LineOffsetSpanProvider {
+                                lines_before: line_number.try_into().unwrap(),
+                                inner: &SourceSpanProvider::from_input(&raw),
+                            },
+                        ));
                 }
             }
+            Token::TagToken(tag) => return self.process_tag(tag, line_number),
             Token::ParseError(_err) => {
                 // Silently ignore parse errors
             }
@@ -168,6 +87,134 @@ impl LinkExtractor {
         }
     }
 
+    fn process_tag(
+        &self,
+        Tag {
+            kind,
+            name,
+            self_closing: _,
+            attrs,
+        }: Tag,
+        line_number: u64,
+    ) -> TokenSinkResult<()> {
+        // Check if this is a verbatim element, which we want to skip.
+        if !self.include_verbatim && is_verbatim_elem(&name) {
+            // Check if we're currently inside a verbatim block
+            let mut curr_verbatim_elem = self.current_verbatim_element_name.borrow_mut();
+
+            if curr_verbatim_elem.is_some() {
+                // Inside a verbatim block. Check if the verbatim
+                // element name matches with the current element name.
+                if curr_verbatim_elem.as_ref() == Some(&name.to_string()) {
+                    // If so, we're done with the verbatim block,
+                    // -- but only if this is an end tag.
+                    if matches!(kind, TagKind::EndTag) {
+                        *curr_verbatim_elem = None;
+                    }
+                }
+            } else if matches!(kind, TagKind::StartTag) {
+                // We're not inside a verbatim block, but we just
+                // encountered a verbatim element. Remember the name
+                // of the element.
+                *curr_verbatim_elem = Some(name.to_string());
+            }
+        }
+        if self.current_verbatim_element_name.borrow().is_some() {
+            // We want to skip the content of this element
+            // as we're inside a verbatim block.
+            return TokenSinkResult::Continue;
+        }
+
+        // Check for rel=nofollow. We only extract the first `rel` attribute.
+        // This is correct as per https://html.spec.whatwg.org/multipage/syntax.html#attributes-0, which states
+        // "There must never be two or more attributes on the same start tag whose names are an ASCII case-insensitive match for each other."
+        if let Some(rel) = attrs.iter().find(|attr| &attr.name.local == "rel") {
+            if rel.value.contains("nofollow") {
+                return TokenSinkResult::Continue;
+            }
+        }
+
+        // Check and exclude `rel=preconnect` and `rel=dns-prefetch`. Unlike `prefetch` and `preload`,
+        // `preconnect` and `dns-prefetch` only perform DNS lookups and do not necessarily link to a resource
+        if let Some(rel) = attrs.iter().find(|attr| &attr.name.local == "rel") {
+            if rel.value.contains("preconnect") || rel.value.contains("dns-prefetch") {
+                return TokenSinkResult::Continue;
+            }
+        }
+
+        // Check and exclude `prefix` attribute. This attribute is used to define a prefix
+        // for the current element. It is not used to link to a resource.
+        if let Some(_prefix) = attrs.iter().find(|attr| &attr.name.local == "prefix") {
+            return TokenSinkResult::Continue;
+        }
+
+        for attr in &attrs {
+            let urls =
+                LinkExtractor::extract_urls_from_elem_attr(&attr.name.local, &name, &attr.value);
+
+            let new_urls = match urls {
+                None => extract_raw_uri_from_plaintext(
+                    &attr.value,
+                    &LineOffsetSpanProvider {
+                        lines_before: line_number.try_into().unwrap(),
+                        inner: &SourceSpanProvider::from_input(&attr.value),
+                    },
+                ),
+                Some(urls) => urls
+                    .into_iter()
+                    .filter(|url| {
+                        // Only accept email addresses which
+                        // - occur in `href` attributes
+                        // - start with `mailto:`
+                        //
+                        // Technically, email addresses could
+                        // also occur in plain text, but we don't want to extract those
+                        // because of the high false positive rate.
+                        //
+                        // This ignores links like `<img srcset="v2@1.5x.png">`
+                        let is_email = is_email_link(url);
+                        let is_mailto = url.starts_with("mailto:");
+                        let is_phone = url.starts_with("tel:");
+                        let is_href = attr.name.local.as_ref() == "href";
+
+                        if attrs.iter().any(|attr| {
+                            &attr.name.local == "rel" && attr.value.contains("stylesheet")
+                        }) {
+                            // Skip virtual/framework-specific stylesheet paths that start with /@ or @
+                            // These are typically resolved by dev servers or build tools rather than being real URLs
+                            // Examples: /@global/style.css, @tailwind/base.css as in
+                            // `<link href="/@global/style.css" rel="stylesheet">`
+                            if url.starts_with("/@") || url.starts_with('@') {
+                                return false;
+                            }
+                            // Skip disabled stylesheets
+                            // Ref: https://developer.mozilla.org/en-US/docs/Web/API/HTMLLinkElement/disabled
+                            if attrs.iter().any(|attr| &attr.name.local == "disabled") {
+                                return false;
+                            }
+                        }
+
+                        !is_email || (is_mailto && is_href) || (is_phone && is_href)
+                    })
+                    .map(|url| RawUri {
+                        text: url.to_string(),
+                        element: Some(name.to_string()),
+                        attribute: Some(attr.name.local.to_string()),
+                        span: RawUriSpan {
+                            line: usize::try_from(line_number)
+                                .unwrap()
+                                .try_into()
+                                .expect("checked above that `line_number != 0`"),
+                            column: None,
+                        },
+                    })
+                    .collect::<Vec<_>>(),
+            };
+            self.links.borrow_mut().extend(new_urls);
+        }
+        TokenSinkResult::Continue
+    }
+
     /// Extract all semantically known links from a given HTML attribute.
     #[allow(clippy::unnested_or_patterns)]
     pub(crate) fn extract_urls_from_elem_attr<'a>(
@@ -242,12 +289,21 @@ mod tests {
     </body>
 </html>"#;
 
+    /// Small test helper to create a [`RawUriSpan`] from just the line and leave the column unset.
+    const fn span(line: usize) -> RawUriSpan {
+        RawUriSpan {
+            line: std::num::NonZeroUsize::new(line).unwrap(),
+            column: None,
+        }
+    }
+
     #[test]
     fn test_skip_verbatim() {
         let expected = vec![RawUri {
             text: "https://example.org".to_string(),
             element: Some("a".to_string()),
             attribute: Some("href".to_string()),
+            span: span(4),
         }];
 
         let uris = extract_html(HTML_INPUT, false);
@@ -261,26 +317,31 @@ mod tests {
                 text: "https://example.com".to_string(),
                 element: None,
                 attribute: None,
+                span: span(4),
             },
             RawUri {
                 text: "https://example.org".to_string(),
                 element: Some("a".to_string()),
                 attribute: Some("href".to_string()),
+                span: span(4),
             },
             RawUri {
                 text: "https://foo.com".to_string(),
                 element: None,
                 attribute: None,
+                span: span(7),
             },
             RawUri {
                 text: "http://bar.com/some/path".to_string(),
                 element: None,
                 attribute: None,
+                span: span(7),
             },
             RawUri {
                 text: "https://baz.org".to_string(),
                 element: Some("a".to_string()),
                 attribute: Some("href".to_string()),
+                span: span(9),
             },
         ];
 
@@ -303,6 +364,7 @@ mod tests {
             text: "https://example.com/".to_string(),
             element: Some("a".to_string()),
             attribute: Some("href".to_string()),
+            span: span(2),
         }];
 
         let uris = extract_html(HTML_INPUT, false);
@@ -320,6 +382,7 @@ mod tests {
             text: "https://example.org".to_string(),
             element: Some("a".to_string()),
             attribute: Some("href".to_string()),
+            span: span(4),
         }];
         let uris = extract_html(input, false);
         assert_eq!(uris, expected);
@@ -337,6 +400,7 @@ mod tests {
             text: "https://example.org".to_string(),
             element: Some("a".to_string()),
             attribute: Some("href".to_string()),
+            span: span(5),
         }];
         let uris = extract_html(input, false);
         assert_eq!(uris, expected);
@@ -353,6 +417,7 @@ mod tests {
             text: "https://example.org".to_string(),
             element: Some("a".to_string()),
             attribute: Some("href".to_string()),
+            span: span(4),
         }];
         let uris = extract_html(input, false);
         assert_eq!(uris, expected);
@@ -375,6 +440,7 @@ mod tests {
             text: "mailto:foo@bar.com".to_string(),
             element: Some("a".to_string()),
             attribute: Some("href".to_string()),
+            span: span(8),
         }];
         let uris = extract_html(input, false);
         assert_eq!(uris, expected);
@@ -397,6 +463,7 @@ mod tests {
             text: "tel:1234567890".to_string(),
             element: Some("a".to_string()),
             attribute: Some("href".to_string()),
+            span: span(8),
         }];
         let uris = extract_html(input, false);
         assert_eq!(uris, expected);
@@ -477,6 +544,7 @@ mod tests {
             text: "https://example.com".to_string(),
             element: Some("a".to_string()),
             attribute: Some("href".to_string()),
+            span: span(2),
         }];
 
         let uris = extract_html(input, false);
diff --git a/lychee-lib/src/extract/html/html5gum.rs b/lychee-lib/src/extract/html/html5gum.rs
index c61a9aa321..2bee5345f3 100644
--- a/lychee-lib/src/extract/html/html5gum.rs
+++ b/lychee-lib/src/extract/html/html5gum.rs
@@ -1,17 +1,14 @@
-use html5gum::{Emitter, Error, State, Tokenizer};
+use html5gum::{
+    Spanned, Tokenizer,
+    emitters::callback::{Callback, CallbackEmitter, CallbackEvent},
+};
 use std::collections::{HashMap, HashSet};
 
 use super::{is_email_link, is_verbatim_elem, srcset};
-use crate::{extract::plaintext::extract_raw_uri_from_plaintext, types::uri::raw::RawUri};
-
-#[derive(Clone, Default, Debug)]
-struct Element {
-    /// Current element name being processed.
-    /// This is called a tag in html5gum.
-    name: String,
-    /// Whether the current element is a closing tag.
-    is_closing: bool,
-}
+use crate::{
+    extract::plaintext::extract_raw_uri_from_plaintext,
+    types::uri::raw::{OffsetSpanProvider, RawUri, SourceSpanProvider, SpanProvider},
+};
 
 /// Extract links from HTML documents.
 ///
@@ -25,38 +22,48 @@ struct Element {
 ///
 /// The `links` vector contains all links extracted from the HTML document and
 /// the `fragments` set contains all fragments extracted from the HTML document.
-#[derive(Clone, Default, Debug)]
-struct LinkExtractor {
+#[derive(Clone, Debug)]
+struct LinkExtractor<S: SpanProvider> {
+    /// The [`SpanProvider`] which will be used to compute spans for URIs.
+    ///
+    /// This is generic, since e.g. the markdown parser has already started, so we have to compute
+    /// the span location in relation to the offset in the outer document.
+    span_provider: S,
     /// Links extracted from the HTML document.
     links: Vec<RawUri>,
     /// Fragments extracted from the HTML document.
     fragments: HashSet<String>,
     /// Whether to include verbatim elements in the output.
     include_verbatim: bool,
-    /// Current element being processed.
-    current_element: Element,
+    /// Current element name being processed.
+    /// This is called a tag in html5gum.
+    current_element: String,
     /// Current attributes being processed.
     /// This is a list of key-value pairs (in order of appearance), where the key is the attribute name
     /// and the value is the attribute value.
-    current_attributes: HashMap<String, String>,
+    current_attributes: HashMap<String, Spanned<String>>,
     /// Current attribute name being processed.
     current_attribute_name: String,
-    /// A bunch of plain characters currently being processed.
-    current_raw_string: String,
     /// Element name of the current verbatim block.
     /// Used to keep track of nested verbatim blocks.
     verbatim_stack: Vec<String>,
 }
 
-impl LinkExtractor {
+impl<S: SpanProvider> LinkExtractor<S> {
     /// Create a new `LinkExtractor`.
     ///
     /// Set `include_verbatim` to `true` if you want to include verbatim
     /// elements in the output.
-    fn new(include_verbatim: bool) -> Self {
+    fn new(span_provider: S, include_verbatim: bool) -> Self {
         Self {
+            span_provider,
             include_verbatim,
-            ..Default::default()
+            links: Vec::default(),
+            fragments: HashSet::default(),
+            current_element: String::default(),
+            current_attributes: HashMap::default(),
+            current_attribute_name: String::default(),
+            verbatim_stack: Vec::default(),
         }
     }
 
@@ -69,17 +76,19 @@ impl LinkExtractor {
 
         // Process 'srcset' attribute first
         if let Some(srcset) = self.current_attributes.get("srcset") {
+            let span = srcset.span;
             urls.extend(srcset::parse(srcset).into_iter().map(|url| RawUri {
                 text: url.to_string(),
-                element: Some(self.current_element.name.clone()),
+                element: Some(self.current_element.clone()),
                 attribute: Some("srcset".to_string()),
+                span: self.span_provider.span(span.start),
             }));
         }
 
         // Process other attributes
         for (attr_name, attr_value) in &self.current_attributes {
             #[allow(clippy::unnested_or_patterns)]
-            match (self.current_element.name.as_str(), attr_name.as_str()) {
+            match (self.current_element.as_str(), attr_name.as_str()) {
                 // Common element/attribute combinations for links
                 (_, "href" | "src" | "cite" | "usemap") |
                 // Less common (but still valid!) combinations
@@ -98,8 +107,9 @@ impl LinkExtractor {
                 ("video", "poster") => {
                     urls.push(RawUri {
                         text: attr_value.to_string(),
-                        element: Some(self.current_element.name.clone()),
+                        element: Some(self.current_element.clone()),
                         attribute: Some(attr_name.to_string()),
+                        span: self.span_provider.span(attr_value.span.start),
                     });
                 }
                 _ => {}
@@ -109,37 +119,9 @@ impl LinkExtractor {
         urls
     }
 
-    /// Extract links from the current string and add them to the links vector.
-    fn flush_current_characters(&mut self) {
-        if !self.include_verbatim
-            && (is_verbatim_elem(&self.current_element.name) || !self.verbatim_stack.is_empty())
-        {
-            self.update_verbatim_element();
-            // Early return since we don't want to extract links from verbatim
-            // blocks according to the configuration.
-            self.current_raw_string.clear();
-            return;
-        }
-
-        self.links
-            .extend(extract_raw_uri_from_plaintext(&self.current_raw_string));
-        self.current_raw_string.clear();
-    }
-
-    /// Update the current verbatim element name.
-    ///
-    /// Keeps track of the last verbatim element name, so that we can
-    /// properly handle nested verbatim blocks.
-    fn update_verbatim_element(&mut self) {
-        if self.current_element.is_closing {
-            if let Some(last_verbatim) = self.verbatim_stack.last() {
-                if last_verbatim == &self.current_element.name {
-                    self.verbatim_stack.pop();
-                }
-            }
-        } else if !self.include_verbatim && is_verbatim_elem(&self.current_element.name) {
-            self.verbatim_stack.push(self.current_element.name.clone());
-        }
+    fn filter_verbatim_here(&self) -> bool {
+        !self.include_verbatim
+            && (is_verbatim_elem(&self.current_element) || !self.verbatim_stack.is_empty())
     }
 
     /// Flush the current element and attribute values to the links vector.
@@ -160,11 +142,7 @@ impl LinkExtractor {
     ///
     /// The current attribute name and value are cleared after processing.
     fn flush_links(&mut self) {
-        self.update_verbatim_element();
-
-        if !self.include_verbatim
-            && (!self.verbatim_stack.is_empty() || is_verbatim_elem(&self.current_element.name))
-        {
+        if self.filter_verbatim_here() {
             self.current_attributes.clear();
             return;
         }
@@ -239,119 +217,97 @@ impl LinkExtractor {
     }
 }
 
-impl Emitter for &mut LinkExtractor {
-    type Token = ();
-
-    fn set_last_start_tag(&mut self, last_start_tag: Option<&[u8]>) {
-        self.current_element.name =
-            String::from_utf8_lossy(last_start_tag.unwrap_or_default()).into_owned();
-    }
-
-    fn emit_eof(&mut self) {
-        self.flush_current_characters();
-    }
-
-    fn emit_error(&mut self, _: Error) {}
-
-    fn should_emit_errors(&mut self) -> bool {
-        false
-    }
-
-    fn pop_token(&mut self) -> Option<()> {
-        None
-    }
-
-    /// Emit a bunch of plain characters as character tokens.
-    fn emit_string(&mut self, c: &[u8]) {
-        self.current_raw_string
-            .push_str(&String::from_utf8_lossy(c));
-    }
-
-    fn init_start_tag(&mut self) {
-        self.flush_current_characters();
-        self.current_element = Element::default();
-    }
-
-    fn init_end_tag(&mut self) {
-        self.flush_current_characters();
-        self.current_element = Element {
-            name: String::new(),
-            is_closing: true,
-        };
-    }
-
-    fn init_comment(&mut self) {
-        self.flush_current_characters();
-    }
-
-    fn emit_current_tag(&mut self) -> Option<State> {
-        self.flush_links();
+impl<S: SpanProvider> Callback<(), usize> for &mut LinkExtractor<S> {
+    fn handle_event(
+        &mut self,
+        event: CallbackEvent<'_>,
+        span: html5gum::Span<usize>,
+    ) -> Option<()> {
+        match event {
+            CallbackEvent::OpenStartTag { name } => {
+                self.current_element = String::from_utf8_lossy(name).into_owned();
+
+                // Update the current verbatim element name.
+                //
+                // Keeps track of the last verbatim element name, so that we can
+                // properly handle nested verbatim blocks.
+                if self.filter_verbatim_here() && is_verbatim_elem(&self.current_element) {
+                    self.verbatim_stack.push(self.current_element.clone());
+                }
+            }
+            CallbackEvent::AttributeName { name } => {
+                self.current_attribute_name = String::from_utf8_lossy(name).into_owned();
+            }
+            CallbackEvent::AttributeValue { value } => {
+                let value = String::from_utf8_lossy(value);
+                self.current_attributes
+                    .entry(self.current_attribute_name.clone())
+                    .and_modify(|v| v.push_str(&value))
+                    .or_insert_with(|| Spanned {
+                        value: value.into_owned(),
+                        span,
+                    });
+            }
+            CallbackEvent::CloseStartTag { self_closing } => {
+                self.flush_links();
 
-        if self.current_element.is_closing {
-            None
-        } else {
-            html5gum::naive_next_state(self.current_element.name.as_bytes())
+                // Update the current verbatim element name.
+                //
+                // Keeps track of the last verbatim element name, so that we can
+                // properly handle nested verbatim blocks.
+                if self_closing && self.filter_verbatim_here() {
+                    if let Some(last_verbatim) = self.verbatim_stack.last() {
+                        if last_verbatim == &self.current_element {
+                            self.verbatim_stack.pop();
+                        }
+                    }
+                }
+            }
+            CallbackEvent::EndTag { .. } => {
+                // Update the current verbatim element name.
+                //
+                // Keeps track of the last verbatim element name, so that we can
+                // properly handle nested verbatim blocks.
+                if self.filter_verbatim_here() {
+                    if let Some(last_verbatim) = self.verbatim_stack.last() {
+                        if last_verbatim == &self.current_element {
+                            self.verbatim_stack.pop();
+                        }
+                    }
+                }
+            }
+            CallbackEvent::String { value } => {
+                if !self.filter_verbatim_here() {
+                    // Extract links from the current string and add them to the links vector.
+                    self.links.extend(extract_raw_uri_from_plaintext(
+                        &String::from_utf8_lossy(value),
+                        &OffsetSpanProvider {
+                            offset: span.start,
+                            inner: &self.span_provider,
+                        },
+                    ));
+                }
+            }
+            CallbackEvent::Comment { .. }
+            | CallbackEvent::Doctype { .. }
+            | CallbackEvent::Error(_) => {}
         }
+        None
     }
-
-    fn emit_current_doctype(&mut self) {}
-
-    fn set_self_closing(&mut self) {
-        self.current_element.is_closing = true;
-    }
-
-    fn set_force_quirks(&mut self) {}
-
-    fn push_tag_name(&mut self, s: &[u8]) {
-        self.current_element
-            .name
-            .push_str(&String::from_utf8_lossy(s));
-    }
-
-    fn push_comment(&mut self, _: &[u8]) {}
-
-    fn push_doctype_name(&mut self, _: &[u8]) {}
-
-    fn init_doctype(&mut self) {
-        self.flush_current_characters();
-    }
-
-    fn init_attribute(&mut self) {
-        self.current_attribute_name.clear();
-    }
-
-    fn push_attribute_name(&mut self, s: &[u8]) {
-        self.current_attribute_name
-            .push_str(&String::from_utf8_lossy(s));
-    }
-
-    fn push_attribute_value(&mut self, s: &[u8]) {
-        let value = String::from_utf8_lossy(s);
-        self.current_attributes
-            .entry(self.current_attribute_name.clone())
-            .and_modify(|v| v.push_str(&value))
-            .or_insert_with(|| value.into_owned());
-    }
-
-    fn set_doctype_public_identifier(&mut self, _: &[u8]) {}
-
-    fn set_doctype_system_identifier(&mut self, _: &[u8]) {}
-
-    fn push_doctype_public_identifier(&mut self, _: &[u8]) {}
-
-    fn push_doctype_system_identifier(&mut self, _: &[u8]) {}
-
-    fn current_is_appropriate_end_tag_token(&mut self) -> bool {
-        self.current_element.is_closing && !self.current_element.name.is_empty()
-    }
-
-    fn emit_current_comment(&mut self) {}
 }
 
 /// Extract unparsed URL strings from an HTML string.
 pub(crate) fn extract_html(buf: &str, include_verbatim: bool) -> Vec<RawUri> {
-    let mut extractor = LinkExtractor::new(include_verbatim);
-    let mut tokenizer = Tokenizer::new_with_emitter(buf, &mut extractor);
+    extract_html_with_span(buf, include_verbatim, SourceSpanProvider::from_input(buf))
+}
+
+pub(crate) fn extract_html_with_span<S: SpanProvider>(
+    buf: &str,
+    include_verbatim: bool,
+    span_provider: S,
+) -> Vec<RawUri> {
+    let mut extractor = LinkExtractor::new(span_provider, include_verbatim);
+    let mut tokenizer = Tokenizer::new_with_emitter(buf, CallbackEmitter::new(&mut extractor));
     assert!(tokenizer.next().is_none());
     extractor
         .links
@@ -362,14 +318,17 @@ pub(crate) fn extract_html(buf: &str, include_verbatim: bool) -> Vec<RawUri> {
 
 /// Extract fragments from id attributes within a HTML string.
 pub(crate) fn extract_html_fragments(buf: &str) -> HashSet<String> {
-    let mut extractor = LinkExtractor::new(true);
-    let mut tokenizer = Tokenizer::new_with_emitter(buf, &mut extractor);
+    let span_provider = SourceSpanProvider::from_input(buf);
+    let mut extractor = LinkExtractor::new(span_provider, true);
+    let mut tokenizer = Tokenizer::new_with_emitter(buf, CallbackEmitter::new(&mut extractor));
     assert!(tokenizer.next().is_none());
     extractor.fragments
 }
 
 #[cfg(test)]
 mod tests {
+    use crate::types::uri::raw::span;
+
     use super::*;
 
     const HTML_INPUT: &str = r#"
@@ -403,6 +362,7 @@ mod tests {
             text: "https://example.org".to_string(),
             element: Some("a".to_string()),
             attribute: Some("href".to_string()),
+            span: span(4, 121),
         }];
 
         let uris = extract_html(HTML_INPUT, false);
@@ -416,26 +376,31 @@ mod tests {
                 text: "https://example.com".to_string(),
                 element: None,
                 attribute: None,
+                span: span(4, 72),
             },
             RawUri {
                 text: "https://example.org".to_string(),
                 element: Some("a".to_string()),
                 attribute: Some("href".to_string()),
+                span: span(4, 121),
             },
             RawUri {
                 text: "https://foo.com".to_string(),
                 element: None,
                 attribute: None,
+                span: span(7, 9),
             },
             RawUri {
                 text: "http://bar.com/some/path".to_string(),
                 element: None,
                 attribute: None,
+                span: span(7, 29),
             },
             RawUri {
                 text: "https://baz.org".to_string(),
                 element: Some("a".to_string()),
                 attribute: Some("href".to_string()),
+                span: span(9, 18),
             },
         ];
 
@@ -458,6 +423,7 @@ mod tests {
             text: "https://example.com/".to_string(),
             element: Some("a".to_string()),
             attribute: Some("href".to_string()),
+            span: span(2, 18),
         }];
 
         let uris = extract_html(HTML_INPUT, false);
@@ -489,6 +455,7 @@ mod tests {
             text: "https://example.org".to_string(),
             element: Some("a".to_string()),
             attribute: Some("href".to_string()),
+            span: span(4, 18),
         }];
         let uris = extract_html(input, false);
         assert_eq!(uris, expected);
@@ -515,6 +482,7 @@ mod tests {
             text: "https://example.org".to_string(),
             element: Some("a".to_string()),
             attribute: Some("href".to_string()),
+            span: span(5, 18),
         }];
         let uris = extract_html(input, false);
         assert_eq!(uris, expected);
@@ -531,6 +499,7 @@ mod tests {
             text: "https://example.org".to_string(),
             element: Some("a".to_string()),
             attribute: Some("href".to_string()),
+            span: span(4, 18),
         }];
         let uris = extract_html(input, false);
         assert_eq!(uris, expected);
@@ -553,6 +522,7 @@ mod tests {
             text: "tel:1234567890".to_string(),
             element: Some("a".to_string()),
             attribute: Some("href".to_string()),
+            span: span(8, 22),
         }];
         let uris = extract_html(input, false);
         assert_eq!(uris, expected);
@@ -575,6 +545,7 @@ mod tests {
             text: "mailto:foo@bar.com".to_string(),
             element: Some("a".to_string()),
             attribute: Some("href".to_string()),
+            span: span(8, 22),
         }];
         let uris = extract_html(input, false);
         assert_eq!(uris, expected);
@@ -614,16 +585,19 @@ mod tests {
             text: "/cdn-cgi/image/format=webp,width=640/https://img.youtube.com/vi/hVBl8_pgQf0/maxresdefault.jpg".to_string(),
             element: Some("img".to_string()),
             attribute: Some("srcset".to_string()),
+            span: span(2, 26),
         },
         RawUri {
             text: "/cdn-cgi/image/format=webp,width=750/https://img.youtube.com/vi/hVBl8_pgQf0/maxresdefault.jpg".to_string(),
             element: Some("img".to_string()),
             attribute: Some("srcset".to_string()),
+            span: span(2, 26),
         },
         RawUri {
             text: "/cdn-cgi/image/format=webp,width=3840/https://img.youtube.com/vi/hVBl8_pgQf0/maxresdefault.jpg".to_string(),
             element: Some("img".to_string()),
             attribute: Some("src".to_string()),
+            span: span(2, 231),
         }
 
         ];
@@ -670,6 +644,7 @@ mod tests {
             text: "https://example.com".to_string(),
             element: Some("a".to_string()),
             attribute: Some("href".to_string()),
+            span: span(2, 22),
         }];
 
         let uris = extract_html(input, false);
diff --git a/lychee-lib/src/extract/markdown.rs b/lychee-lib/src/extract/markdown.rs
index 41d6b515ff..6aeeef505f 100644
--- a/lychee-lib/src/extract/markdown.rs
+++ b/lychee-lib/src/extract/markdown.rs
@@ -1,11 +1,14 @@
 //! Extract links and fragments from markdown documents
 use std::collections::{HashMap, HashSet};
 
-use pulldown_cmark::{CowStr, Event, LinkType, Options, Parser, Tag, TagEnd, TextMergeStream};
+use pulldown_cmark::{CowStr, Event, LinkType, Options, Parser, Tag, TagEnd, TextMergeWithOffset};
 
-use crate::{extract::plaintext::extract_raw_uri_from_plaintext, types::uri::raw::RawUri};
+use crate::{
+    extract::{html::html5gum::extract_html_with_span, plaintext::extract_raw_uri_from_plaintext},
+    types::uri::raw::{OffsetSpanProvider, RawUri, SourceSpanProvider, SpanProvider as _},
+};
 
-use super::html::html5gum::{extract_html, extract_html_fragments};
+use super::html::html5gum::extract_html_fragments;
 
 /// Returns the default markdown extensions used by lychee.
 /// Sadly, `|` is not const for `Options` so we can't use a const global.
@@ -14,15 +17,18 @@ fn md_extensions() -> Options {
 }
 
 /// Extract unparsed URL strings from a Markdown string.
+#[allow(clippy::too_many_lines)]
 pub(crate) fn extract_markdown(input: &str, include_verbatim: bool) -> Vec<RawUri> {
     // In some cases it is undesirable to extract links from within code blocks,
     // which is why we keep track of entries and exits while traversing the input.
     let mut inside_code_block = false;
     let mut inside_link_block = false;
 
-    let parser = TextMergeStream::new(Parser::new_ext(input, md_extensions()));
+    let span_provider = SourceSpanProvider::from_input(input);
+    let parser =
+        TextMergeWithOffset::new(Parser::new_ext(input, md_extensions()).into_offset_iter());
     parser
-        .filter_map(|event| match event {
+        .filter_map(|(event, span)| match event {
             // A link.
             Event::Start(Tag::Link {
                 link_type,
@@ -43,6 +49,9 @@ pub(crate) fn extract_markdown(input: &str, include_verbatim: bool) -> Vec<RawUr
                             // `LinkType` for better granularity in the future
                             element: Some("a".to_string()),
                             attribute: Some("href".to_string()),
+                            // Sadly, we don't know how long the `foo` part in `[foo](bar)` is,
+                            // so the span points to the `[` and not to the `b`.
+                            span: span_provider.span(span.start),
                         }])
                     }
                     // Reference without destination in the document, but resolved by the `broken_link_callback`
@@ -60,12 +69,23 @@ pub(crate) fn extract_markdown(input: &str, include_verbatim: bool) -> Vec<RawUr
                     // Autolink like `<http://foo.bar/baz>`
                     LinkType::Autolink |
                     // Email address in autolink like `<john@example.org>`
-                    LinkType::Email =>
-                     Some(extract_raw_uri_from_plaintext(&dest_url)),
+                    LinkType::Email => {
+                        let offset = match link_type {
+                            // We don't know how the link starts, so don't offset the span.
+                            LinkType::Reference | LinkType::CollapsedUnknown | LinkType::ShortcutUnknown => 0,
+                            // These start all with `[` or `<`, so offset the span by `1`.
+                            LinkType::ReferenceUnknown | LinkType::Collapsed | LinkType::Shortcut | LinkType::Autolink | LinkType::Email => 1,
+                            _ => {
+                                debug_assert!(false, "unreachable");
+                                0
+                            }
+                        };
+                        Some(extract_raw_uri_from_plaintext(&dest_url, &OffsetSpanProvider { offset: span.start + offset, inner: &span_provider, }))
+                    }
                     // Wiki URL (`[[http://example.com]]`)
                     LinkType::WikiLink { has_pothole: _ } => {
                         inside_link_block = true;
-                        //Ignore gitlab toc notation: https://docs.gitlab.com/user/markdown/#table-of-contents
+                        // Ignore gitlab toc notation: https://docs.gitlab.com/user/markdown/#table-of-contents
                         if ["_TOC_".to_string(), "TOC".to_string()].contains(&dest_url.to_string()) {
                             return None;
                         }
@@ -73,6 +93,8 @@ pub(crate) fn extract_markdown(input: &str, include_verbatim: bool) -> Vec<RawUr
                             text: dest_url.to_string(),
                             element: Some("a".to_string()),
                             attribute: Some("href".to_string()),
+                            // wiki links start with `[[`, so offset the span by `2`
+                            span: span_provider.span(span.start + 2),
                         }])
                     }
                 }
@@ -88,6 +110,7 @@ pub(crate) fn extract_markdown(input: &str, include_verbatim: bool) -> Vec<RawUr
                     // `LinkType` for better granularity in the future
                     element: Some("img".to_string()),
                     attribute: Some("src".to_string()),
+                    span: span_provider.span(span.start),
                 }])
             }
 
@@ -106,7 +129,10 @@ pub(crate) fn extract_markdown(input: &str, include_verbatim: bool) -> Vec<RawUr
                 if (inside_code_block && !include_verbatim) || inside_link_block {
                     None
                 } else {
-                    Some(extract_raw_uri_from_plaintext(&txt))
+                    Some(extract_raw_uri_from_plaintext(
+                        &txt,
+                        &OffsetSpanProvider { offset: span.start, inner: &span_provider }
+                    ))
                 }
             }
 
@@ -114,13 +140,21 @@ pub(crate) fn extract_markdown(input: &str, include_verbatim: bool) -> Vec<RawUr
             Event::Html(html) | Event::InlineHtml(html) => {
                 // This won't exclude verbatim links right now, because HTML gets passed in chunks
                 // by pulldown_cmark. So excluding `<pre>` and `<code>` is not handled right now.
-                Some(extract_html(&html, include_verbatim))
+                Some(extract_html_with_span(
+                    &html,
+                    include_verbatim,
+                    OffsetSpanProvider { offset: span.start, inner: &span_provider }
+                ))
             }
 
             // An inline code node.
             Event::Code(code) => {
                 if include_verbatim {
-                    Some(extract_raw_uri_from_plaintext(&code))
+                    // inline code starts with '`', so offset the span by `1`.
+                    Some(extract_raw_uri_from_plaintext(
+                        &code,
+                        &OffsetSpanProvider { offset: span.start + 1, inner: &span_provider }
+                    ))
                 } else {
                     None
                 }
@@ -228,6 +262,8 @@ impl HeadingIdGenerator {
 
 #[cfg(test)]
 mod tests {
+    use crate::types::uri::raw::span;
+
     use super::*;
 
     const MD_INPUT: &str = r#"
@@ -272,11 +308,13 @@ or inline like `https://bar.org` for instance.
                 text: "https://foo.com".to_string(),
                 element: Some("a".to_string()),
                 attribute: Some("href".to_string()),
+                span: span(4, 19),
             },
             RawUri {
                 text: "http://example.com".to_string(),
                 element: Some("a".to_string()),
                 attribute: Some("href".to_string()),
+                span: span(18, 1),
             },
         ];
 
@@ -291,21 +329,25 @@ or inline like `https://bar.org` for instance.
                 text: "https://foo.com".to_string(),
                 element: Some("a".to_string()),
                 attribute: Some("href".to_string()),
+                span: span(4, 19),
             },
             RawUri {
                 text: "https://bar.com/123".to_string(),
                 element: None,
                 attribute: None,
+                span: span(11, 1),
             },
             RawUri {
                 text: "https://bar.org".to_string(),
                 element: None,
                 attribute: None,
+                span: span(14, 17),
             },
             RawUri {
                 text: "http://example.com".to_string(),
                 element: Some("a".to_string()),
                 attribute: Some("href".to_string()),
+                span: span(18, 1),
             },
         ];
 
@@ -377,6 +419,7 @@ $$
             text: "https://example.com/_/foo".to_string(),
             element: None,
             attribute: None,
+            span: span(1, 1),
         }];
         let uris = extract_markdown(markdown, true);
         assert_eq!(uris, expected);
@@ -389,6 +432,7 @@ $$
             text: "https://example.com/_".to_string(),
             element: None,
             attribute: None,
+            span: span(1, 1),
         }];
         let uris = extract_markdown(markdown, true);
         assert_eq!(uris, expected);
@@ -401,6 +445,7 @@ $$
             text: "https://example.com/destination".to_string(),
             element: Some("a".to_string()),
             attribute: Some("href".to_string()),
+            span: span(1, 3),
         }];
         let uris = extract_markdown(markdown, true);
         assert_eq!(uris, expected);
@@ -414,11 +459,13 @@ $$
                 text: "https://example.com/destination".to_string(),
                 element: Some("a".to_string()),
                 attribute: Some("href".to_string()),
+                span: span(1, 3),
             },
             RawUri {
                 text: "https://example.com/source".to_string(),
                 element: Some("a".to_string()),
                 attribute: Some("href".to_string()),
+                span: span(1, 38),
             },
         ];
         let uris = extract_markdown(markdown, true);
diff --git a/lychee-lib/src/extract/mod.rs b/lychee-lib/src/extract/mod.rs
index 91b48078d8..9c6aeec544 100644
--- a/lychee-lib/src/extract/mod.rs
+++ b/lychee-lib/src/extract/mod.rs
@@ -1,4 +1,7 @@
-use crate::types::{FileType, InputContent, uri::raw::RawUri};
+use crate::types::{
+    FileType, InputContent,
+    uri::raw::{RawUri, SourceSpanProvider},
+};
 
 pub mod html;
 pub mod markdown;
@@ -50,7 +53,10 @@ impl Extractor {
                     html::html5gum::extract_html(&input_content.content, self.include_verbatim)
                 }
             }
-            FileType::Plaintext => extract_raw_uri_from_plaintext(&input_content.content),
+            FileType::Plaintext => extract_raw_uri_from_plaintext(
+                &input_content.content,
+                &SourceSpanProvider::from_input(&input_content.content),
+            ),
         }
     }
 }
diff --git a/lychee-lib/src/extract/plaintext.rs b/lychee-lib/src/extract/plaintext.rs
index 95ff2e6927..fb998415a6 100644
--- a/lychee-lib/src/extract/plaintext.rs
+++ b/lychee-lib/src/extract/plaintext.rs
@@ -1,25 +1,42 @@
-use crate::{types::uri::raw::RawUri, utils::url};
+use crate::{
+    types::uri::raw::{RawUri, SpanProvider},
+    utils::url,
+};
 
 /// Extract unparsed URL strings from plaintext
-pub(crate) fn extract_raw_uri_from_plaintext(input: &str) -> Vec<RawUri> {
+pub(crate) fn extract_raw_uri_from_plaintext(
+    input: &str,
+    span_provider: &impl SpanProvider,
+) -> Vec<RawUri> {
     url::find_links(input)
-        .map(|uri| RawUri::from(uri.as_str()))
+        .map(|uri| RawUri {
+            text: uri.as_str().to_owned(),
+            element: None,
+            attribute: None,
+            span: span_provider.span(uri.start()),
+        })
         .collect()
 }
 
 #[cfg(test)]
 mod tests {
+    use crate::types::uri::raw::{SourceSpanProvider, span};
+
     use super::*;
 
+    fn extract(input: &str) -> Vec<RawUri> {
+        extract_raw_uri_from_plaintext(input, &SourceSpanProvider::from_input(input))
+    }
+
     #[test]
     fn test_extract_local_links() {
         let input = "http://127.0.0.1/ and http://127.0.0.1:8888/ are local links.";
-        let links: Vec<RawUri> = extract_raw_uri_from_plaintext(input);
+        let links: Vec<RawUri> = extract(input);
         assert_eq!(
             links,
             [
-                RawUri::from("http://127.0.0.1/"),
-                RawUri::from("http://127.0.0.1:8888/")
+                RawUri::from(("http://127.0.0.1/", span(1, 1))),
+                RawUri::from(("http://127.0.0.1:8888/", span(1, 23),)),
             ]
         );
     }
@@ -27,9 +44,9 @@ mod tests {
     #[test]
     fn test_extract_link_at_end_of_line() {
         let input = "https://www.apache.org/licenses/LICENSE-2.0\n";
-        let uri = RawUri::from(input.trim_end());
+        let uri = RawUri::from((input.trim_end(), span(1, 1)));
 
-        let uris: Vec<RawUri> = extract_raw_uri_from_plaintext(input);
+        let uris: Vec<RawUri> = extract(input);
         assert_eq!(vec![uri], uris);
     }
 }
diff --git a/lychee-lib/src/types/uri/raw.rs b/lychee-lib/src/types/uri/raw.rs
index 3ad51f2cf8..9c1aa37b0e 100644
--- a/lychee-lib/src/types/uri/raw.rs
+++ b/lychee-lib/src/types/uri/raw.rs
@@ -1,4 +1,4 @@
-use std::fmt::Display;
+use std::{fmt::Display, num::NonZeroUsize};
 
 /// A raw URI that got extracted from a document with a fuzzy parser.
 /// Note that this can still be invalid according to stricter URI standards
@@ -17,6 +17,8 @@ pub struct RawUri {
     /// that will be checked e.g. by trying to filter out links that were found
     /// in unwanted attributes like `srcset` or `manifest`.
     pub attribute: Option<String>,
+    /// The position of the URI in the document.
+    pub span: RawUriSpan,
 }
 
 impl Display for RawUri {
@@ -25,12 +27,124 @@ impl Display for RawUri {
     }
 }
 
-impl From<&str> for RawUri {
-    fn from(text: &str) -> Self {
+#[cfg(test)]
+impl From<(&str, RawUriSpan)> for RawUri {
+    fn from((text, span): (&str, RawUriSpan)) -> Self {
         RawUri {
             text: text.to_string(),
             element: None,
             attribute: None,
+            span,
         }
     }
 }
+
+/// A span of a [`RawUri`] in the document.
+///
+/// The span can be used to give more precise error messages.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub struct RawUriSpan {
+    /// The line of the URI.
+    ///
+    /// The line is 1-based.
+    pub line: NonZeroUsize,
+    /// The column of the URI if computable.
+    ///
+    /// The column is 1-based.
+    /// This is `None`, if the column can't be computed exactly,
+    /// e.g. when it comes from the `html5ever` parser.
+    pub column: Option<NonZeroUsize>,
+}
+
+/// Test helper to create [`RawUriSpan`]s easily.
+#[cfg(test)]
+pub const fn span(line: usize, column: usize) -> RawUriSpan {
+    RawUriSpan {
+        line: NonZeroUsize::new(line).unwrap(),
+        column: Some(NonZeroUsize::new(column).unwrap()),
+    }
+}
+
+/// A trait for calculating a [`RawUriSpan`] at a given byte offset in the document.
+///
+/// If you have a document and want spans with absolute positions, use [`SourceSpanProvider`].
+/// If you start inside a document at a given offset, use [`OffsetSpanProvider`].
+pub trait SpanProvider {
+    /// Compute the [`RawUriSpan`] at a given byte offset in the document.
+    fn span(&self, offset: usize) -> RawUriSpan;
+}
+
+/// A [`SpanProvider`] which calculates spans depending on the input lines.
+///
+/// Precomputes line lengths so that constructing [`RawUriSpan`]s is faster.
+/// If you start inside a document at a given offset, consider using [`OffsetSpanProvider`].
+#[derive(Clone, Debug)]
+pub struct SourceSpanProvider<'a> {
+    /// The computed map from line number to offset in the document.
+    line_starts: Vec<usize>,
+    /// The input document.
+    ///
+    /// This is used to compute column information, since we can't rely on each character being a
+    /// single byte long.
+    input: &'a str,
+}
+
+impl<'a> SourceSpanProvider<'a> {
+    /// Create a [`SpanProvider`] from the given document.
+    ///
+    /// If the input is part of a larger document, consider using [`OffsetSpanProvider`] instead.
+    ///
+    /// This function isn't just a simple constructor but does some work, so call this only if you
+    /// want to use it.
+    pub fn from_input(input: &'a str) -> Self {
+        // FIXME: Consider making this lazy?
+        let line_starts: Vec<_> = core::iter::once(0)
+            .chain(input.match_indices('\n').map(|(i, _)| i + 1))
+            .collect();
+        Self { line_starts, input }
+    }
+}
+
+impl SpanProvider for SourceSpanProvider<'_> {
+    fn span(&self, offset: usize) -> RawUriSpan {
+        const ONE: NonZeroUsize = NonZeroUsize::MIN;
+        let line = match self.line_starts.binary_search(&offset) {
+            Ok(i) => i,
+            Err(i) => i - 1,
+        };
+        // Since we get the index by the binary_search above and subtract `1` if it would be larger
+        // than the length of the document, this shouldn't panic.
+        let line_offset = self.line_starts[line];
+        let column = self
+            .input
+            .get(line_offset..offset)
+            .or_else(|| self.input.get(line_offset..))
+            // columns are 1-based
+            .map(|v| ONE.saturating_add(v.chars().count()));
+
+        RawUriSpan {
+            // lines are 1-based
+            line: ONE.saturating_add(line),
+            column,
+        }
+    }
+}
+
+/// A [`SpanProvider`] which starts at a given offset in the document.
+///
+/// All given offsets are changed by the given amount before computing the
+/// resulting [`RawUriSpan`] with the inner [`SpanProvider`].
+#[derive(Clone, Debug)]
+pub struct OffsetSpanProvider<'a, T: SpanProvider = SourceSpanProvider<'a>> {
+    /// The byte offset in the document by which all given offsets are changed before computing the
+    /// resulting [`RawUriSpan`] with the inner [`SpanProvider`].
+    pub offset: usize,
+    /// The inner [`SpanProvider`] which will be used to determine the spans.
+    pub inner: &'a T,
+}
+
+impl<T: SpanProvider> SpanProvider for OffsetSpanProvider<'_, T> {
+    fn span(&self, offset: usize) -> RawUriSpan {
+        self.inner.span(self.offset + offset)
+    }
+}
diff --git a/lychee-lib/src/utils/request.rs b/lychee-lib/src/utils/request.rs
index 6b57b9ee8c..cdf719f9cd 100644
--- a/lychee-lib/src/utils/request.rs
+++ b/lychee-lib/src/utils/request.rs
@@ -206,8 +206,24 @@ fn prepend_root_dir_if_absolute_local_link(text: &str, root_dir: Option<&PathBuf
 
 #[cfg(test)]
 mod tests {
+    use std::num::NonZeroUsize;
+
+    use crate::types::uri::raw::RawUriSpan;
+
     use super::*;
 
+    fn raw_uri(text: &'static str) -> RawUri {
+        RawUri {
+            text: text.to_string(),
+            element: None,
+            attribute: None,
+            span: RawUriSpan {
+                line: NonZeroUsize::MAX,
+                column: None,
+            },
+        }
+    }
+
     #[test]
     fn test_is_anchor() {
         assert!(is_anchor("#anchor"));
@@ -226,7 +242,7 @@ mod tests {
         let base = Base::try_from("https://example.com/path/page.html").unwrap();
         let source = InputSource::String(String::new());
 
-        let uris = vec![RawUri::from("relative.html")];
+        let uris = vec![raw_uri("relative.html")];
         let requests = create(uris, &source, None, Some(&base), None);
 
         assert_eq!(requests.len(), 1);
@@ -242,7 +258,7 @@ mod tests {
         let base = Base::try_from("https://example.com/path/page.html").unwrap();
         let source = InputSource::String(String::new());
 
-        let uris = vec![RawUri::from("https://another.com/page")];
+        let uris = vec![raw_uri("https://another.com/page")];
         let requests = create(uris, &source, None, Some(&base), None);
 
         assert_eq!(requests.len(), 1);
@@ -258,7 +274,7 @@ mod tests {
         let base = Base::try_from("https://example.com/path/page.html").unwrap();
         let source = InputSource::String(String::new());
 
-        let uris = vec![RawUri::from("/root-relative")];
+        let uris = vec![raw_uri("/root-relative")];
         let requests = create(uris, &source, None, Some(&base), None);
 
         assert_eq!(requests.len(), 1);
@@ -274,7 +290,7 @@ mod tests {
         let base = Base::try_from("https://example.com/path/page.html").unwrap();
         let source = InputSource::String(String::new());
 
-        let uris = vec![RawUri::from("../parent")];
+        let uris = vec![raw_uri("../parent")];
         let requests = create(uris, &source, None, Some(&base), None);
 
         assert_eq!(requests.len(), 1);
@@ -290,7 +306,7 @@ mod tests {
         let base = Base::try_from("https://example.com/path/page.html").unwrap();
         let source = InputSource::String(String::new());
 
-        let uris = vec![RawUri::from("#fragment")];
+        let uris = vec![raw_uri("#fragment")];
         let requests = create(uris, &source, None, Some(&base), None);
 
         assert_eq!(requests.len(), 1);
@@ -306,7 +322,7 @@ mod tests {
         let root_dir = PathBuf::from("/tmp/lychee");
         let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
 
-        let uris = vec![RawUri::from("relative.html")];
+        let uris = vec![raw_uri("relative.html")];
         let requests = create(uris, &source, Some(&root_dir), None, None);
 
         assert_eq!(requests.len(), 1);
@@ -322,7 +338,7 @@ mod tests {
         let root_dir = PathBuf::from("/tmp/lychee");
         let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
 
-        let uris = vec![RawUri::from("https://another.com/page")];
+        let uris = vec![raw_uri("https://another.com/page")];
         let requests = create(uris, &source, Some(&root_dir), None, None);
 
         assert_eq!(requests.len(), 1);
@@ -338,7 +354,7 @@ mod tests {
         let root_dir = PathBuf::from("/tmp/lychee");
         let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
 
-        let uris = vec![RawUri::from("/root-relative")];
+        let uris = vec![raw_uri("/root-relative")];
         let requests = create(uris, &source, Some(&root_dir), None, None);
 
         assert_eq!(requests.len(), 1);
@@ -354,7 +370,7 @@ mod tests {
         let root_dir = PathBuf::from("/tmp/lychee");
         let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
 
-        let uris = vec![RawUri::from("../parent")];
+        let uris = vec![raw_uri("../parent")];
         let requests = create(uris, &source, Some(&root_dir), None, None);
 
         assert_eq!(requests.len(), 1);
@@ -370,7 +386,7 @@ mod tests {
         let root_dir = PathBuf::from("/tmp/lychee");
         let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
 
-        let uris = vec![RawUri::from("#fragment")];
+        let uris = vec![raw_uri("#fragment")];
         let requests = create(uris, &source, Some(&root_dir), None, None);
 
         assert_eq!(requests.len(), 1);
@@ -387,7 +403,7 @@ mod tests {
         let base = Base::try_from("https://example.com/path/page.html").unwrap();
         let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
 
-        let uris = vec![RawUri::from("relative.html")];
+        let uris = vec![raw_uri("relative.html")];
         let requests = create(uris, &source, Some(&root_dir), Some(&base), None);
 
         assert_eq!(requests.len(), 1);
@@ -404,7 +420,7 @@ mod tests {
         let base = Base::try_from("https://example.com/path/page.html").unwrap();
         let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
 
-        let uris = vec![RawUri::from("https://another.com/page")];
+        let uris = vec![raw_uri("https://another.com/page")];
         let requests = create(uris, &source, Some(&root_dir), Some(&base), None);
 
         assert_eq!(requests.len(), 1);
@@ -421,7 +437,7 @@ mod tests {
         let base = Base::try_from("https://example.com/path/page.html").unwrap();
         let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
 
-        let uris = vec![RawUri::from("/root-relative")];
+        let uris = vec![raw_uri("/root-relative")];
         let requests = create(uris, &source, Some(&root_dir), Some(&base), None);
 
         assert_eq!(requests.len(), 1);
@@ -438,7 +454,7 @@ mod tests {
         let base = Base::try_from("https://example.com/path/page.html").unwrap();
         let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
 
-        let uris = vec![RawUri::from("../parent")];
+        let uris = vec![raw_uri("../parent")];
         let requests = create(uris, &source, Some(&root_dir), Some(&base), None);
 
         assert_eq!(requests.len(), 1);
@@ -455,7 +471,7 @@ mod tests {
         let base = Base::try_from("https://example.com/path/page.html").unwrap();
         let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
 
-        let uris = vec![RawUri::from("#fragment")];
+        let uris = vec![raw_uri("#fragment")];
         let requests = create(uris, &source, Some(&root_dir), Some(&base), None);
 
         assert_eq!(requests.len(), 1);
@@ -470,7 +486,7 @@ mod tests {
     fn test_no_base_url_resolution() {
         let source = InputSource::String(String::new());
 
-        let uris = vec![RawUri::from("https://example.com/page")];
+        let uris = vec![raw_uri("https://example.com/page")];
         let requests = create(uris, &source, None, None, None);
 
         assert_eq!(requests.len(), 1);
@@ -487,7 +503,7 @@ mod tests {
         let input_source = InputSource::FsPath(PathBuf::from("page.html"));
 
         let actual = create_request(
-            &RawUri::from("file.html"),
+            &raw_uri("file.html"),
             &input_source,
             None,
             Some(&base),
@@ -516,7 +532,7 @@ mod tests {
 
         // Use an absolute path that's outside the base directory
         let actual = create_request(
-            &RawUri::from("/usr/local/share/doc/example.html"),
+            &raw_uri("/usr/local/share/doc/example.html"),
             &input_source,
             None,
             Some(&base),
@@ -543,7 +559,7 @@ mod tests {
         let base = Base::Local(PathBuf::from("/tmp/lychee"));
         let source = InputSource::String(String::new());
 
-        let raw_uri = RawUri::from("relative.html");
+        let raw_uri = raw_uri("relative.html");
         let uri = try_parse_into_uri(&raw_uri, &source, None, Some(&base)).unwrap();
 
         assert_eq!(uri.url.as_str(), "file:///tmp/lychee/relative.html");
@@ -554,7 +570,7 @@ mod tests {
         let base = Base::Local(PathBuf::from("/tmp/lychee"));
         let source = InputSource::String(String::new());
 
-        let raw_uri = RawUri::from("absolute.html");
+        let raw_uri = raw_uri("absolute.html");
         let uri = try_parse_into_uri(&raw_uri, &source, None, Some(&base)).unwrap();
 
         assert_eq!(uri.url.as_str(), "file:///tmp/lychee/absolute.html");