diff --git a/README.md b/README.md index c493cd14fb..bda7ed5a4f 100644 --- a/README.md +++ b/README.md @@ -460,7 +460,7 @@ Options: which have multiple extensions, e.g. HTML files with both .html and .htm extensions, you need to specify both extensions explicitly. - [default: md,mkd,mdx,mdown,mdwn,mkdn,mkdown,markdown,html,htm,txt] + [default: md,mkd,mdx,mdown,mdwn,mkdn,mkdown,markdown,html,htm,css,txt] -f, --format Output format of final status report diff --git a/lychee-bin/src/options.rs b/lychee-bin/src/options.rs index 857c3f6b64..b5bb70c142 100644 --- a/lychee-bin/src/options.rs +++ b/lychee-bin/src/options.rs @@ -4,7 +4,7 @@ use crate::parse::parse_base; use crate::verbosity::Verbosity; use anyhow::{Context, Error, Result, anyhow}; use clap::builder::PossibleValuesParser; -use clap::{Parser, arg, builder::TypedValueParser}; +use clap::{Parser, builder::TypedValueParser}; use const_format::{concatcp, formatcp}; use http::{ HeaderMap, diff --git a/lychee-lib/src/extract/css.rs b/lychee-lib/src/extract/css.rs new file mode 100644 index 0000000000..4aa048fb4b --- /dev/null +++ b/lychee-lib/src/extract/css.rs @@ -0,0 +1,439 @@ +//! Extract URLs from CSS content +//! +//! This module extracts URLs from CSS files and ` + +"#; + let input_content = InputContent::from_string(input, FileType::Html); + let extractor = Extractor::new(false, false, false); + let raw_uris = extractor.extract(&input_content); + assert_eq!(raw_uris, vec![css_url("./lychee.png", span(5, 32))]); + } + + #[test] + fn test_extract_css_from_css_file() { + let input = r#" +.example { + background-image: url("./image.png"); + background: url('/absolute/path.jpg'); +} +@import url(https://example.com/style.css); +"#; + let input_content = InputContent::from_string(input, FileType::Css); + let extractor = Extractor::new(false, false, false); + let raw_uris = extractor.extract(&input_content); + assert_eq!( + raw_uris, + vec![ + css_url("./image.png", span(3, 23)), + css_url("/absolute/path.jpg", span(4, 17)), + css_url("https://example.com/style.css", span(6, 9)), + ] + ); + } + + #[test] + fn test_extract_multiple_css_urls_from_style_tag() { + let input = r#" + + + +"#; + let input_content = InputContent::from_string(input, FileType::Html); + let extractor = Extractor::new(false, false, false); + let raw_uris = extractor.extract(&input_content); + + assert_eq!( + raw_uris, + vec![ + css_url("./bg.png", span(5, 32)), + css_url("../fonts/font.woff2", span(8, 19)), + ] + ); + } + + fn css_url(text: &str, span: RawUriSpan) -> RawUri { + RawUri { + text: text.into(), + element: Some("style".into()), + attribute: Some("url".into()), + span, + } + } } diff --git a/lychee-lib/src/types/file.rs b/lychee-lib/src/types/file.rs index 653f7323bd..29f01c021b 100644 --- a/lychee-lib/src/types/file.rs +++ b/lychee-lib/src/types/file.rs @@ -81,6 +81,7 @@ impl From for FileExtensions { match file_type { FileType::Html => FileType::html_extensions(), FileType::Markdown => FileType::markdown_extensions(), + FileType::Css => FileType::css_extensions(), FileType::Plaintext => FileType::plaintext_extensions(), } } @@ -115,11 +116,24 @@ pub enum FileType { Html, /// File in Markdown format Markdown, + /// File in CSS format + Css, /// Generic text file without syntax-specific parsing #[default] Plaintext, } +impl std::fmt::Display for FileType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + FileType::Html => write!(f, "HTML"), + FileType::Markdown => write!(f, "Markdown"), + FileType::Css => write!(f, "CSS"), + FileType::Plaintext => write!(f, "plaintext"), + } + } +} + impl FileType { /// All known Markdown extensions const MARKDOWN_EXTENSIONS: &'static [&'static str] = &[ @@ -129,6 +143,9 @@ impl FileType { /// All known HTML extensions const HTML_EXTENSIONS: &'static [&'static str] = &["htm", "html"]; + /// All known CSS extensions + const CSS_EXTENSIONS: &'static [&'static str] = &["css"]; + /// All known plaintext extensions const PLAINTEXT_EXTENSIONS: &'static [&'static str] = &["txt"]; @@ -138,6 +155,7 @@ impl FileType { let mut extensions = FileExtensions::empty(); extensions.extend(Self::markdown_extensions()); extensions.extend(Self::html_extensions()); + extensions.extend(Self::css_extensions()); extensions.extend(Self::plaintext_extensions()); extensions } @@ -160,6 +178,15 @@ impl FileType { .collect() } + /// All known CSS extensions + #[must_use] + pub fn css_extensions() -> FileExtensions { + Self::CSS_EXTENSIONS + .iter() + .map(|&s| s.to_string()) + .collect() + } + /// All known plaintext extensions #[must_use] pub fn plaintext_extensions() -> FileExtensions { @@ -177,6 +204,8 @@ impl FileType { Some(Self::Markdown) } else if Self::HTML_EXTENSIONS.contains(&ext.as_str()) { Some(Self::Html) + } else if Self::CSS_EXTENSIONS.contains(&ext.as_str()) { + Some(Self::Css) } else if Self::PLAINTEXT_EXTENSIONS.contains(&ext.as_str()) { Some(Self::Plaintext) } else { @@ -240,12 +269,14 @@ mod tests { assert!(extensions.contains("html")); assert!(extensions.contains("markdown")); assert!(extensions.contains("htm")); + assert!(extensions.contains("css")); // Test that the count matches our static arrays let all_extensions: Vec<_> = extensions.into(); assert_eq!( all_extensions.len(), FileType::MARKDOWN_EXTENSIONS.len() + FileType::HTML_EXTENSIONS.len() + + FileType::CSS_EXTENSIONS.len() + FileType::PLAINTEXT_EXTENSIONS.len() ); } diff --git a/lychee-lib/src/utils/fragment_checker.rs b/lychee-lib/src/utils/fragment_checker.rs index 8c7fac6cd5..b737e1c804 100644 --- a/lychee-lib/src/utils/fragment_checker.rs +++ b/lychee-lib/src/utils/fragment_checker.rs @@ -131,8 +131,8 @@ impl FragmentChecker { let extractor = match file_type { FileType::Markdown => extract_markdown_fragments, FileType::Html => extract_html_fragments, - FileType::Plaintext => { - info!("Skipping fragment check for {url} within a plaintext file"); + FileType::Css | FileType::Plaintext => { + info!("Skipping fragment check for {url} within a {file_type} file"); return Ok(true); } };