@@ -2,14 +2,15 @@ use polars_utils::slice::load_padded_le_u64;
2
2
3
3
use crate :: bitmap:: { Bitmap , MutableBitmap } ;
4
4
use crate :: storage:: SharedStorage ;
5
+ use crate :: trusted_len:: TrustedLen ;
5
6
6
7
/// Used to build bitmaps bool-by-bool in sequential order.
7
8
#[ derive( Default , Clone ) ]
8
9
pub struct BitmapBuilder {
9
- buf : u64 ,
10
- len : usize , // Length in bits.
11
- cap : usize , // Capacity in bits.
12
- set_bits : usize ,
10
+ buf : u64 , // A buffer containing the last self.bit_len % 64 bits.
11
+ bit_len : usize , // Length in bits.
12
+ bit_cap : usize , // Capacity in bits (always multiple of 64) .
13
+ set_bits_in_bytes : usize , // The number of bits set in self.bytes, not including self.buf.
13
14
bytes : Vec < u8 > ,
14
15
}
15
16
@@ -18,40 +19,52 @@ impl BitmapBuilder {
18
19
Self :: default ( )
19
20
}
20
21
22
+ #[ inline( always) ]
21
23
pub fn len ( & self ) -> usize {
22
- self . len
24
+ self . bit_len
23
25
}
24
26
27
+ #[ inline( always) ]
25
28
pub fn capacity ( & self ) -> usize {
26
- self . cap
29
+ self . bit_cap
30
+ }
31
+
32
+ #[ inline( always) ]
33
+ pub fn set_bits ( & self ) -> usize {
34
+ self . set_bits_in_bytes + self . buf . count_ones ( ) as usize
35
+ }
36
+
37
+ #[ inline( always) ]
38
+ pub fn unset_bits ( & self ) -> usize {
39
+ self . bit_len - self . set_bits ( )
27
40
}
28
41
29
42
pub fn with_capacity ( bits : usize ) -> Self {
30
43
let bytes = Vec :: with_capacity ( bits. div_ceil ( 64 ) * 8 ) ;
31
44
let words_available = bytes. capacity ( ) / 8 ;
32
45
Self {
33
46
buf : 0 ,
34
- len : 0 ,
35
- cap : words_available * 64 ,
36
- set_bits : 0 ,
47
+ bit_len : 0 ,
48
+ bit_cap : words_available * 64 ,
49
+ set_bits_in_bytes : 0 ,
37
50
bytes,
38
51
}
39
52
}
40
53
41
54
#[ inline( always) ]
42
55
pub fn reserve ( & mut self , additional : usize ) {
43
- if self . len + additional > self . cap {
56
+ if self . bit_len + additional > self . bit_cap {
44
57
self . reserve_slow ( additional)
45
58
}
46
59
}
47
60
48
61
#[ cold]
49
62
#[ inline( never) ]
50
63
fn reserve_slow ( & mut self , additional : usize ) {
51
- let bytes_needed = ( self . len + additional) . div_ceil ( 64 ) * 8 ;
64
+ let bytes_needed = ( self . bit_len + additional) . div_ceil ( 64 ) * 8 ;
52
65
self . bytes . reserve ( bytes_needed - self . bytes . len ( ) ) ;
53
66
let words_available = self . bytes . capacity ( ) / 8 ;
54
- self . cap = words_available * 64 ;
67
+ self . bit_cap = words_available * 64 ;
55
68
}
56
69
57
70
#[ inline( always) ]
@@ -75,46 +88,52 @@ impl BitmapBuilder {
75
88
/// self.len() < self.capacity() must hold.
76
89
#[ inline( always) ]
77
90
pub unsafe fn push_unchecked ( & mut self , x : bool ) {
78
- debug_assert ! ( self . len < self . cap ) ;
79
- self . buf |= ( x as u64 ) << ( self . len % 64 ) ;
80
- self . len += 1 ;
81
- if self . len % 64 == 0 {
91
+ debug_assert ! ( self . bit_len < self . bit_cap ) ;
92
+ self . buf |= ( x as u64 ) << ( self . bit_len % 64 ) ;
93
+ self . bit_len += 1 ;
94
+ if self . bit_len % 64 == 0 {
82
95
self . flush_word_unchecked ( self . buf ) ;
83
- self . set_bits += self . buf . count_ones ( ) as usize ;
96
+ self . set_bits_in_bytes += self . buf . count_ones ( ) as usize ;
84
97
self . buf = 0 ;
85
98
}
86
99
}
87
100
101
+ #[ inline( always) ]
88
102
pub fn extend_constant ( & mut self , length : usize , value : bool ) {
89
103
// Fast path if the extension still fits in buf with room left to spare.
90
- let bits_in_buf = self . len % 64 ;
104
+ let bits_in_buf = self . bit_len % 64 ;
91
105
if bits_in_buf + length < 64 {
92
106
let bit_block = ( ( value as u64 ) << length) - ( value as u64 ) ;
93
107
self . buf |= bit_block << bits_in_buf;
94
- self . len += length;
95
- return ;
108
+ self . bit_len += length;
109
+ } else {
110
+ self . extend_constant_slow ( length, value) ;
96
111
}
112
+ }
97
113
114
+ #[ cold]
115
+ fn extend_constant_slow ( & mut self , length : usize , value : bool ) {
98
116
unsafe {
99
117
let value_spread = if value { u64:: MAX } else { 0 } ; // Branchless neg.
100
118
101
119
// Extend and flush current buf.
102
120
self . reserve ( length) ;
121
+ let bits_in_buf = self . bit_len % 64 ;
103
122
let ext_buf = self . buf | ( value_spread << bits_in_buf) ;
104
123
self . flush_word_unchecked ( ext_buf) ;
105
- self . set_bits += ext_buf. count_ones ( ) as usize ;
124
+ self . set_bits_in_bytes += ext_buf. count_ones ( ) as usize ;
106
125
107
126
// Write complete words.
108
127
let remaining_bits = length - ( 64 - bits_in_buf) ;
109
128
let remaining_words = remaining_bits / 64 ;
110
129
for _ in 0 ..remaining_words {
111
130
self . flush_word_unchecked ( value_spread) ;
112
131
}
113
- self . set_bits += ( remaining_words * 64 ) & value_spread as usize ;
132
+ self . set_bits_in_bytes += ( remaining_words * 64 ) & value_spread as usize ;
114
133
115
134
// Put remainder in buf and update length.
116
135
self . buf = ( ( value as u64 ) << ( remaining_bits % 64 ) ) - ( value as u64 ) ;
117
- self . len += length;
136
+ self . bit_len += length;
118
137
}
119
138
}
120
139
@@ -123,21 +142,21 @@ impl BitmapBuilder {
123
142
/// # Safety
124
143
/// self.len + length <= self.cap and length <= 64 must hold.
125
144
pub unsafe fn push_word_with_len_unchecked ( & mut self , word : u64 , length : usize ) {
126
- debug_assert ! ( self . len + length <= self . cap ) ;
145
+ debug_assert ! ( self . bit_len + length <= self . bit_cap ) ;
127
146
debug_assert ! ( length <= 64 ) ;
128
147
debug_assert ! ( length == 64 || ( word >> length) == 0 ) ;
129
- let bits_in_buf = self . len % 64 ;
148
+ let bits_in_buf = self . bit_len % 64 ;
130
149
self . buf |= word << bits_in_buf;
131
150
if bits_in_buf + length >= 64 {
132
151
self . flush_word_unchecked ( self . buf ) ;
133
- self . set_bits += self . buf . count_ones ( ) as usize ;
152
+ self . set_bits_in_bytes += self . buf . count_ones ( ) as usize ;
134
153
self . buf = if bits_in_buf > 0 {
135
154
word >> ( 64 - bits_in_buf)
136
155
} else {
137
156
0
138
157
} ;
139
158
}
140
- self . len += length;
159
+ self . bit_len += length;
141
160
}
142
161
143
162
/// # Safety
@@ -168,24 +187,24 @@ impl BitmapBuilder {
168
187
slice = slice. get_unchecked ( offset / 8 ..) ;
169
188
170
189
// Write word-by-word.
171
- let bits_in_buf = self . len % 64 ;
190
+ let bits_in_buf = self . bit_len % 64 ;
172
191
if bits_in_buf > 0 {
173
192
while length >= 64 {
174
193
let word = u64:: from_le_bytes ( slice. get_unchecked ( 0 ..8 ) . try_into ( ) . unwrap ( ) ) ;
175
194
self . buf |= word << bits_in_buf;
176
195
self . flush_word_unchecked ( self . buf ) ;
177
- self . set_bits += self . buf . count_ones ( ) as usize ;
196
+ self . set_bits_in_bytes += self . buf . count_ones ( ) as usize ;
178
197
self . buf = word >> ( 64 - bits_in_buf) ;
179
- self . len += 64 ;
198
+ self . bit_len += 64 ;
180
199
length -= 64 ;
181
200
slice = slice. get_unchecked ( 8 ..) ;
182
201
}
183
202
} else {
184
203
while length >= 64 {
185
204
let word = u64:: from_le_bytes ( slice. get_unchecked ( 0 ..8 ) . try_into ( ) . unwrap ( ) ) ;
186
205
self . flush_word_unchecked ( word) ;
187
- self . set_bits += word. count_ones ( ) as usize ;
188
- self . len += 64 ;
206
+ self . set_bits_in_bytes += word. count_ones ( ) as usize ;
207
+ self . bit_len += 64 ;
189
208
length -= 64 ;
190
209
slice = slice. get_unchecked ( 8 ..) ;
191
210
}
@@ -206,27 +225,94 @@ impl BitmapBuilder {
206
225
}
207
226
}
208
227
228
+ pub fn extend_from_bitmap ( & mut self , bitmap : & Bitmap ) {
229
+ // TODO: we can perhaps use the bitmaps bitcount here instead of
230
+ // recomputing it if it has a known bitcount.
231
+ let ( slice, offset, length) = bitmap. as_slice ( ) ;
232
+ self . extend_from_slice ( slice, offset, length) ;
233
+ }
234
+
209
235
/// # Safety
210
236
/// May only be called once at the end.
211
237
unsafe fn finish ( & mut self ) {
212
- if self . len % 64 != 0 {
238
+ if self . bit_len % 64 != 0 {
213
239
self . bytes . extend_from_slice ( & self . buf . to_le_bytes ( ) ) ;
214
- self . set_bits += self . buf . count_ones ( ) as usize ;
240
+ self . set_bits_in_bytes += self . buf . count_ones ( ) as usize ;
241
+ self . buf = 0 ;
215
242
}
216
243
}
217
244
245
+ /// Converts this BitmapBuilder into a mutable bitmap.
218
246
pub fn into_mut ( mut self ) -> MutableBitmap {
219
247
unsafe {
220
248
self . finish ( ) ;
221
- MutableBitmap :: from_vec ( self . bytes , self . len )
249
+ MutableBitmap :: from_vec ( self . bytes , self . bit_len )
250
+ }
251
+ }
252
+
253
+ /// The same as into_mut, but returns None if the bitmap is all-ones.
254
+ pub fn into_opt_mut_validity ( mut self ) -> Option < MutableBitmap > {
255
+ unsafe {
256
+ self . finish ( ) ;
257
+ if self . set_bits_in_bytes == self . bit_len {
258
+ return None ;
259
+ }
260
+ Some ( MutableBitmap :: from_vec ( self . bytes , self . bit_len ) )
222
261
}
223
262
}
224
263
264
+ /// Freezes this BitmapBuilder into an immutable Bitmap.
225
265
pub fn freeze ( mut self ) -> Bitmap {
226
266
unsafe {
227
267
self . finish ( ) ;
228
268
let storage = SharedStorage :: from_vec ( self . bytes ) ;
229
- Bitmap :: from_inner_unchecked ( storage, 0 , self . len , Some ( self . len - self . set_bits ) )
269
+ Bitmap :: from_inner_unchecked (
270
+ storage,
271
+ 0 ,
272
+ self . bit_len ,
273
+ Some ( self . bit_len - self . set_bits_in_bytes ) ,
274
+ )
275
+ }
276
+ }
277
+
278
+ /// The same as freeze, but returns None if the bitmap is all-ones.
279
+ pub fn into_opt_validity ( mut self ) -> Option < Bitmap > {
280
+ unsafe {
281
+ self . finish ( ) ;
282
+ if self . set_bits_in_bytes == self . bit_len {
283
+ return None ;
284
+ }
285
+ let storage = SharedStorage :: from_vec ( self . bytes ) ;
286
+ let bitmap = Bitmap :: from_inner_unchecked (
287
+ storage,
288
+ 0 ,
289
+ self . bit_len ,
290
+ Some ( self . bit_len - self . set_bits_in_bytes ) ,
291
+ ) ;
292
+ Some ( bitmap)
230
293
}
231
294
}
295
+
296
+ pub fn extend_trusted_len_iter < I > ( & mut self , iterator : I )
297
+ where
298
+ I : Iterator < Item = bool > + TrustedLen ,
299
+ {
300
+ self . reserve ( iterator. size_hint ( ) . 1 . unwrap ( ) ) ;
301
+ for b in iterator {
302
+ // SAFETY: we reserved and the iterator's length is trusted.
303
+ unsafe {
304
+ self . push_unchecked ( b) ;
305
+ }
306
+ }
307
+ }
308
+
309
+ #[ inline]
310
+ pub fn from_trusted_len_iter < I > ( iterator : I ) -> Self
311
+ where
312
+ I : Iterator < Item = bool > + TrustedLen ,
313
+ {
314
+ let mut builder = Self :: new ( ) ;
315
+ builder. extend_trusted_len_iter ( iterator) ;
316
+ builder
317
+ }
232
318
}
0 commit comments