From 8defd1062bb8b7b5050fbf5aa4d8a526e271821e Mon Sep 17 00:00:00 2001
From: Kornel <kornel@geekhood.net>
Date: Sun, 2 Jul 2023 16:09:33 +0100
Subject: [PATCH] Allocate attr name HashSet only if necessary

---
 benches/bench.rs       | 13 +++++++++++++
 src/reader/indexset.rs | 16 ++++++++++++++--
 2 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/benches/bench.rs b/benches/bench.rs
index c2065b1d..d7f74279 100644
--- a/benches/bench.rs
+++ b/benches/bench.rs
@@ -15,6 +15,19 @@ fn read(bencher: &mut Bencher) {
     });
 }
 
+#[bench]
+fn read_lots_attrs(bencher: &mut Bencher) {
+    let xml = r#"<x
+        a0="" b0="" c0="" d0="" e0="" f0="" g0="" h0="" i0="" j0="" k0="" l0="" m0="" n0="" o0="" p0="" q0="" r0="" s0="" t0="" u0="" v0="" w0="" x0="" y0="" z0="" a1="" b1="" c1="" d1="" e1="" f1="" g1="" h1="" i1="" j1="" k1="" l1="" m1="" n1="" o1="" p1="" q1="" r1="" s1="" t1="" u1="" v1="" w1="" x1="" y1="" z1="" a2="" b2="" c2="" d2="" e2="" f2="" g2="" h2="" i2="" j2="" k2="" l2="" m2="" n2="" o2="" p2="" q2="" r2="" s2="" t2="" u2="" v2="" w2="" x2="" y2="" z2="" a3="" b3="" c3="" d3="" e3="" f3="" g3="" h3="" i3="" j3="" k3="" l3="" m3="" n3="" o3="" p3="" q3="" r3="" s3="" t3="" u3="" v3="" w3="" x3="" y3="" z3="" a4="" b4="" c4="" d4="" e4="" f4="" g4="" h4="" i4="" j4="" k4="" l4="" m4="" n4="" o4="" p4="" q4="" r4="" s4="" t4="" u4="" v4="" w4="" x4="" y4="" z4="" a5="" b5="" c5="" d5="" e5="" f5="" g5="" h5="" i5="" j5="" k5="" l5="" m5="" n5="" o5="" p5="" q5="" r5="" s5="" t5="" u5="" v5="" w5="" x5="" y5="" z5="" a6="" b6="" c6="" d6="" e6="" f6="" g6="" h6="" i6="" j6="" k6="" l6="" m6="" n6="" o6="" p6="" q6="" r6="" s6="" t6="" u6="" v6="" w6="" x6="" y6="" z6="" a7="" b7="" c7="" d7="" e7="" f7="" g7="" h7="" i7="" j7="" k7="" l7="" m7="" n7="" o7="" p7="" q7="" r7="" s7="" t7="" u7="" v7="" w7="" x7="" y7="" z7="" a8="" b8="" c8="" d8="" e8="" f8="" g8="" h8="" i8="" j8="" k8="" l8="" m8="" n8="" o8="" p8="" q8="" r8="" s8="" t8="" u8="" v8="" w8="" x8="" y8="" z8="" a9="" b9="" c9="" d9="" e9="" f9="" g9="" h9="" i9="" j9="" k9="" l9="" m9="" n9="" o9="" p9="" q9="" r9="" s9="" t9="" u9="" v9="" w9="" x9="" y9="" z9="" a10="" b10="" c10="" d10="" e10="" f10="" g10="" h10="" i10="" j10="" k10="" l10="" m10="" n10="" o10="" p10="" q10="" r10="" s10="" t10="" u10="" v10="" w10="" x10="" y10="" z10="" a11="" b11="" c11="" d11="" e11="" f11="" g11="" h11="" i11="" j11="" k11="" l11="" m11="" n11="" o11="" p11="" q11="" r11="" s11="" t11="" u11="" v11="" w11="" x11="" y11="" z11="" a12="" b12="" c12="" d12="" e12="" f12="" g12="" h12="" i12="" j12="" k12="" l12="" m12="" n12="" o12="" p12="" q12="" r12="" s12="" t12="" u12="" v12="" w12="" x12="" y12="" z12="" a13="" b13="" c13="" d13="" e13="" f13="" g13="" h13="" i13="" j13="" k13="" l13="" m13="" n13="" o13="" p13="" q13="" r13="" s13="" t13="" u13="" v13="" w13="" x13="" y13="" z13="" a14="" b14="" c14="" d14="" e14="" f14="" g14="" h14="" i14="" j14="" k14="" l14="" m14="" n14="" o14="" p14="" q14="" r14="" s14="" t14="" u14="" v14="" w14="" x14="" y14="" z14="" a15="" b15="" c15="" d15="" e15="" f15="" g15="" h15="" i15="" j15="" k15="" l15="" m15="" n15="" o15="" p15="" q15="" r15="" s15="" t15="" u15="" v15="" w15="" x15="" y15="" z15="" a16="" b16="" c16="" d16="" e16="" f16="" g16="" h16="" i16="" j16="" k16="" l16="" m16="" n16="" o16="" p16="" q16="" r16="" s16="" t16="" u16="" v16="" w16="" x16="" y16="" z16="" a17="" b17="" c17="" d17="" e17="" f17="" g17="" h17="" i17="" j17="" k17="" l17="" m17="" n17="" o17="" p17="" q17="" r17="" s17="" t17="" u17="" v17="" w17="" x17="" y17="" z17="" a18="" b18="" c18="" d18="" e18="" f18="" g18="" h18="" i18="" j18="" k18="" l18="" m18="" n18="" o18="" p18="" q18="" r18="" s18="" t18="" u18="" v18="" w18="" x18="" y18="" z18="" a19="" b19="" c19="" d19="" e19="" f19="" g19="" h19="" i19="" j19="" k19="" l19="" m19="" n19="" o19="" p19="" q19="" r19="" s19="" t19="" u19="" v19="" w19="" x19="" y19="" z19="" a20="" b20="" c20="" d20="" e20="" f20="" g20="" h20="" i20="" j20="" k20="" l20="" m20="" n20="" o20="" p20="" q20="" r20="" s20="" t20="" u20="" v20="" w20="" x20="" y20="" z20="" a21="" b21="" c21="" d21="" e21="" f21="" g21="" h21="" i21="" j21="" k21="" l21="" m21="" n21="" o21="" p21="" q21="" r21="" s21="" t21="" u21="" v21="" w21="" x21="" y21="" z21="" a22="" b22="" c22="" d22="" e22="" f22="" g22="" h22="" i22="" j22="" k22="" l22="" m22="" n22="" o22="" p22="" q22="" r22="" s22="" t22="" u22="" v22="" w22="" x22="" y22="" z22="" a23="" b23="" c23="" d23="" e23="" f23="" g23="" h23="" i23="" j23="" k23="" l23="" m23="" n23="" o23="" p23="" q23="" r23="" s23="" t23="" u23="" v23="" w23="" x23="" y23="" z23="" a24="" b24="" c24="" d24="" e24="" f24="" g24="" h24="" i24="" j24="" k24="" l24="" m24="" n24="" o24="" p24="" q24="" r24="" s24="" t24="" u24="" v24="" w24="" x24="" y24="" z24="" a25="" b25="" c25="" d25="" e25="" f25="" g25="" h25="" i25="" j25="" k25="" l25="" m25="" n25="" o25="" p25="" q25="" r25="" s25="" t25="" u25="" v25="" w25="" x25="" y25="" z25="" a26="" b26="" c26="" d26="" e26="" f26="" g26="" h26="" i26="" j26="" k26="" l26="" m26="" n26="" o26="" p26="" q26="" r26="" s26="" t26="" u26="" v26="" w26="" x26="" y26="" z26="" a27="" b27="" c27="" d27="" e27="" f27="" g27="" h27="" i27="" j27="" k27="" l27="" m27="" n27="" o27="" p27="" q27="" r27="" s27="" t27="" u27="" v27="" w27="" x27="" y27="" z27="" a28="" b28="" c28="" d28="" e28="" f28="" g28="" h28="" i28="" j28="" k28="" l28="" m28="" n28="" o28="" p28="" q28="" r28="" s28="" t28="" u28="" v28="" w28="" x28="" y28="" z28="" a29="" b29="" c29="" d29="" e29="" f29="" g29="" h29="" i29="" j29="" k29="" l29="" m29="" n29="" o29="" p29="" q29="" r29="" s29="" t29="" u29="" v29="" w29="" x29="" y29="" z29="" a30="" b30="" c30="" d30="" e30="" f30="" g30="" h30="" i30="" j30="" k30="" l30="" m30="" n30="" o30="" p30="" q30="" r30="" s30="" t30="" u30="" v30="" w30="" x30="" y30="" z30="" a31="" b31="" c31="" d31="" e31="" f31="" g31="" h31="" i31="" j31="" k31="" l31="" m31="" n31="" o31="" p31="" q31="" r31="" s31="" t31="" u31="" v31="" w31="" x31="" y31="" z31="" a32="" b32="" c32="" d32="" e32="" f32="" g32="" h32="" i32="" j32="" k32="" l32="" m32="" n32="" o32="" p32="" q32="" r32="" s32="" t32="" u32="" v32="" w32="" x32="" y32="" z32="" a33="" b33="" c33="" d33="" e33="" f33="" g33="" h33="" i33="" j33="" k33="" l33="" m33="" n33="" o33="" p33="" q33="" r33="" s33="" t33="" u33="" v33="" w33="" x33="" y33="" z33="" a34="" b34="" c34="" d34="" e34="" f34="" g34="" h34="" i34="" j34="" k34="" l34="" m34="" n34="" o34="" p34="" q34="" r34="" s34="" t34="" u34="" v34="" w34="" x34="" y34="" z34="" a35="" b35="" c35="" d35="" e35="" f35="" g35="" h35="" i35="" j35="" k35="" l35="" m35="" n35="" o35="" p35="" q35="" r35="" s35="" t35="" u35="" v35="" w35="" x35="" y35="" z35="" a36="" b36="" c36="" d36="" e36="" f36="" g36="" h36="" i36="" j36="" k36="" l36="" m36="" n36="" o36="" p36="" q36="" r36="" s36="" t36="" u36="" v36="" w36="" x36="" y36="" z36="" a37="" b37="" c37="" d37="" e37="" f37="" g37="" h37="" i37="" j37="" k37="" l37="" m37="" n37="" o37="" p37="" q37="" r37="" s37="" t37="" u37="" v37="" w37="" x37="" y37="" z37="" a38="" b38="" c38="" d38="" e38="" f38="" g38="" h38="" i38="" j38="" k38="" l38="" m38="" n38="" o38="" p38="" q38="" r38="" s38="" t38="" u38="" v38="" w38="" x38="" y38="" z38="" a39="" b39="" c39="" d39="" e39="" f39="" g39="" h39="" i39="" j39="" k39="" l39="" m39="" n39="" o39="" p39="" q39="" r39="" s39="" t39="" u39="" v39="" w39="" x39="" y39="" z39="" a40="" b40="" c40="" d40="" e40="" f40="" g40="" h40="" i40="" j40="" k40="" l40="" m40="" n40="" o40="" p40="" q40="" r40="" s40="" t40="" u40="" v40="" w40="" x40="" y40="" z40="" a41="" b41="" c41="" d41="" e41="" f41="" g41="" h41="" i41="" j41="" k41="" l41="" m41="" n41="" o41="" p41="" q41="" r41="" s41="" t41="" u41="" v41="" w41="" x41="" y41="" z41="" a42="" b42="" c42="" d42="" e42="" f42="" g42="" h42="" i42="" j42="" k42="" l42="" m42="" n42="" o42="" p42="" q42="" r42="" s42="" t42="" u42="" v42="" w42="" x42="" y42="" z42="" a43="" b43="" c43="" d43="" e43="" f43="" g43="" h43="" i43="" j43="" k43="" l43="" m43="" n43="" o43="" p43="" q43="" r43="" s43="" t43="" u43="" v43="" w43="" x43="" y43="" z43="" a44="" b44="" c44="" d44="" e44="" f44="" g44="" h44="" i44="" j44="" k44="" l44="" m44="" n44="" o44="" p44="" q44="" r44="" s44="" t44="" u44="" v44="" w44="" x44="" y44="" z44="" a45="" b45="" c45="" d45="" e45="" f45="" g45="" h45="" i45="" j45="" k45="" l45="" m45="" n45="" o45="" p45="" q45="" r45="" s45="" t45="" u45="" v45="" w45="" x45="" y45="" z45="" a46="" b46="" c46="" d46="" e46="" f46="" g46="" h46="" i46="" j46="" k46="" l46="" m46="" n46="" o46="" p46="" q46="" r46="" s46="" t46="" u46="" v46="" w46="" x46="" y46="" z46="" a47="" b47="" c47="" d47="" e47="" f47="" g47="" h47="" i47="" j47="" k47="" l47="" m47="" n47="" o47="" p47="" q47="" r47="" s47="" t47="" u47="" v47="" w47="" x47="" y47="" z47="" a48="" b48="" c48="" d48="" e48="" f48="" g48="" h48="" i48="" j48="" k48="" l48="" m48="" n48="" o48="" p48="" q48="" r48="" s48="" t48="" u48="" v48=""
+    />"#;
+    bencher.iter(move || {
+        let parser = EventReader::new(xml.as_bytes());
+        for e in parser {
+            e.unwrap();
+        }
+    });
+}
+
 #[bench]
 fn write(bencher: &mut Bencher) {
     let xml = std::fs::read("tests/documents/sample_1.xml").unwrap();
diff --git a/src/reader/indexset.rs b/src/reader/indexset.rs
index 82fb8e65..aaca88ca 100644
--- a/src/reader/indexset.rs
+++ b/src/reader/indexset.rs
@@ -16,6 +16,10 @@ pub(crate) struct AttributesSet {
     hasher: RandomState,
 }
 
+/// Use linear search and don't allocate `HashSet` if there are few attributes,
+/// because allocation costs more than a few comparisons.
+const HASH_THRESHOLD: usize = 8;
+
 impl AttributesSet {
     pub fn new() -> Self {
         Self {
@@ -33,12 +37,20 @@ impl AttributesSet {
 
     pub fn contains(&self, name: &OwnedName) -> bool {
         // fall back to linear search only on duplicate or hash collision
-        self.may_contain.contains(&self.hash(name)) &&
+        (self.vec.len() < HASH_THRESHOLD || self.may_contain.contains(&self.hash(name))) &&
             self.vec.iter().any(move |a| &a.name == name)
     }
 
     pub fn push(&mut self, attr: OwnedAttribute) {
-        self.may_contain.insert(self.hash(&attr.name));
+        if self.vec.len() >= HASH_THRESHOLD {
+            if self.vec.len() == HASH_THRESHOLD {
+                self.may_contain.reserve(HASH_THRESHOLD * 2);
+                for attr in &self.vec {
+                    self.may_contain.insert(self.hash(&attr.name));
+                }
+            }
+            self.may_contain.insert(self.hash(&attr.name));
+        }
         self.vec.push(attr);
     }