From 0334e03ea73e24d6b7e3cec7419aad4962f64d34 Mon Sep 17 00:00:00 2001 From: Jeremy Roman Date: Tue, 31 Oct 2023 02:44:56 -0400 Subject: [PATCH] Fix duplicate descriptions bug Various attributes (with IDs matching patterns such as attr-dim-*, attr-fae-*, attr-fe-*, and attr-media-*) have duplicate definition links (for various elements they apply to) which were being concatenated as though they were multiple distinct descriptions. These shouldn't be duplicated (these descriptions are by definition identical). This was a regression introduced in the Rust preprocessor rewrite of 24db54a96c9a40ee3890b6195093a034a90e9cf8. Fixes https://github.com/whatwg/html/issues/9889. --- src/annotate_attributes.rs | 63 +++++++++++++++++++++++++++++++++++++- 1 file changed, 62 insertions(+), 1 deletion(-) diff --git a/src/annotate_attributes.rs b/src/annotate_attributes.rs index 0072084..def2f9d 100644 --- a/src/annotate_attributes.rs +++ b/src/annotate_attributes.rs @@ -1,6 +1,6 @@ //! Augments the content attribute list for each element with a description found in the Attributes table. -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::io; use std::rc::Rc; @@ -116,6 +116,11 @@ impl Processor { _ => continue, }; + // If a single row describes the same element multiple times, we don't need to repeat it. + // StrTendril doesn't have logical interior mutability, so this Clippy warning is overzealous. + #[allow(clippy::mutable_key_type)] + let mut seen_this_row: HashSet = HashSet::new(); + // These will be strings like "attr-input-maxlength", which identify particular element-attribute pairs. let data_x = QualName::new(None, ns!(), LocalName::from("data-x")); for attr_key in keys_td @@ -124,6 +129,14 @@ impl Processor { .iter() .filter_map(|c| c.get_attribute(&data_x).filter(|v| !v.is_empty())) { + // If this row describes the the same attribute, with the same + // identifier, for multiple elements (like attr-fae-form and + // attr-dim-width), these aren't actually distinct descriptions + // and we need not join them. + if !seen_this_row.insert(attr_key.clone()) { + continue; + } + // Find the comment, if one exists, and extract its contents. let description = description_td.children.borrow(); let mut variant_comment = None; @@ -463,6 +476,54 @@ mod tests {
nameaAnchor name
nameaName of the anchor +
+ "#.trim() + ); + Ok(()) + } + + #[tokio::test] + async fn test_identical_links() -> io::Result<()> { + // This checks the same identifier can be linked multiple times without + // repeating the description. + let document = parse_document_async( + r#" +

The img element

+
+
Content attributes +
width +
+

The video element

+
+
Content attributes +
width +
+

Attributes

+ +
widthimg; videoHorizontal dimension +
+ "#.trim().as_bytes()).await?; + let mut proc = Processor::new(); + dom_utils::scan_dom(&document, &mut |h| proc.visit(h)); + proc.apply().await?; + assert_eq!( + serialize_for_test(&[document]), + r#" +

The img element

+
+
Content attributes +
width + — Horizontal dimension +
+

The video element

+
+
Content attributes +
width + — Horizontal dimension +
+

Attributes

+ +
widthimg; videoHorizontal dimension
"#.trim() );