@@ -7,7 +7,44 @@ use crate::chunked_array::metadata::MetadataProperties;
7
7
use crate :: chunked_array:: object:: builder:: ObjectChunkedBuilder ;
8
8
use crate :: utils:: slice_offsets;
9
9
10
- #[ inline]
10
+ pub ( crate ) fn split_at (
11
+ chunks : & [ ArrayRef ] ,
12
+ offset : i64 ,
13
+ own_length : usize ,
14
+ ) -> ( Vec < ArrayRef > , Vec < ArrayRef > ) {
15
+ let mut new_chunks_left = Vec :: with_capacity ( 1 ) ;
16
+ let mut new_chunks_right = Vec :: with_capacity ( 1 ) ;
17
+ let ( raw_offset, _) = slice_offsets ( offset, 0 , own_length) ;
18
+
19
+ let mut remaining_offset = raw_offset;
20
+ let mut iter = chunks. iter ( ) ;
21
+
22
+ for chunk in & mut iter {
23
+ let chunk_len = chunk. len ( ) ;
24
+ if remaining_offset > 0 && remaining_offset >= chunk_len {
25
+ remaining_offset -= chunk_len;
26
+ new_chunks_left. push ( chunk. clone ( ) ) ;
27
+ continue ;
28
+ }
29
+
30
+ let ( l, r) = chunk. split_at_boxed ( remaining_offset) ;
31
+ new_chunks_left. push ( l) ;
32
+ new_chunks_right. push ( r) ;
33
+ break ;
34
+ }
35
+
36
+ for chunk in iter {
37
+ new_chunks_right. push ( chunk. clone ( ) )
38
+ }
39
+ if new_chunks_left. is_empty ( ) {
40
+ new_chunks_left. push ( chunks[ 0 ] . sliced ( 0 , 0 ) ) ;
41
+ }
42
+ if new_chunks_right. is_empty ( ) {
43
+ new_chunks_right. push ( chunks[ 0 ] . sliced ( 0 , 0 ) ) ;
44
+ }
45
+ ( new_chunks_left, new_chunks_right)
46
+ }
47
+
11
48
pub ( crate ) fn slice (
12
49
chunks : & [ ArrayRef ] ,
13
50
offset : i64 ,
@@ -136,12 +173,69 @@ impl<T: PolarsDataType> ChunkedArray<T> {
136
173
}
137
174
}
138
175
176
+ /// Split the array. The chunks are reallocated the underlying data slices are zero copy.
177
+ ///
178
+ /// When offset is negative it will be counted from the end of the array.
179
+ /// This method will never error,
180
+ /// and will slice the best match when offset, or length is out of bounds
181
+ pub fn split_at ( & self , offset : i64 ) -> ( Self , Self ) {
182
+ // A normal slice, slice the buffers and thus keep the whole memory allocated.
183
+ let ( l, r) = split_at ( & self . chunks , offset, self . len ( ) ) ;
184
+ let mut out_l = unsafe { self . copy_with_chunks ( l) } ;
185
+ let mut out_r = unsafe { self . copy_with_chunks ( r) } ;
186
+
187
+ use MetadataProperties as P ;
188
+ let mut properties_l = P :: SORTED | P :: FAST_EXPLODE_LIST ;
189
+ let mut properties_r = P :: SORTED | P :: FAST_EXPLODE_LIST ;
190
+
191
+ let is_ascending = self . is_sorted_ascending_flag ( ) ;
192
+ let is_descending = self . is_sorted_descending_flag ( ) ;
193
+
194
+ if is_ascending || is_descending {
195
+ let has_nulls_at_start = self . null_count ( ) != 0
196
+ && self
197
+ . chunks ( )
198
+ . first ( )
199
+ . unwrap ( )
200
+ . as_ref ( )
201
+ . validity ( )
202
+ . map_or ( false , |bm| bm. get ( 0 ) . unwrap ( ) ) ;
203
+
204
+ if !has_nulls_at_start {
205
+ let can_copy_min_value = !has_nulls_at_start && is_ascending;
206
+ let can_copy_max_value = !has_nulls_at_start && is_descending;
207
+
208
+ properties_l. set ( P :: MIN_VALUE , can_copy_min_value) ;
209
+ properties_l. set ( P :: MAX_VALUE , can_copy_max_value) ;
210
+ }
211
+
212
+ let has_nulls_at_end = self . null_count ( ) != 0
213
+ && self
214
+ . chunks ( )
215
+ . last ( )
216
+ . unwrap ( )
217
+ . as_ref ( )
218
+ . validity ( )
219
+ . map_or ( false , |bm| bm. get ( bm. len ( ) - 1 ) . unwrap ( ) ) ;
220
+
221
+ if !has_nulls_at_end {
222
+ let can_copy_min_value = !has_nulls_at_end && is_descending;
223
+ let can_copy_max_value = !has_nulls_at_end && is_ascending;
224
+ properties_r. set ( P :: MIN_VALUE , can_copy_min_value) ;
225
+ properties_r. set ( P :: MAX_VALUE , can_copy_max_value) ;
226
+ }
227
+ }
228
+ out_l. copy_metadata ( self , properties_l) ;
229
+ out_r. copy_metadata ( self , properties_r) ;
230
+
231
+ ( out_l, out_r)
232
+ }
233
+
139
234
/// Slice the array. The chunks are reallocated the underlying data slices are zero copy.
140
235
///
141
236
/// When offset is negative it will be counted from the end of the array.
142
237
/// This method will never error,
143
238
/// and will slice the best match when offset, or length is out of bounds
144
- #[ inline]
145
239
pub fn slice ( & self , offset : i64 , length : usize ) -> Self {
146
240
// The len: 0 special cases ensure we release memory.
147
241
// A normal slice, slice the buffers and thus keep the whole memory allocated.
0 commit comments